From 7ecdb12de094d31dd9d59fd3ae29e6d6fa0e2108 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:52:24 +0100 Subject: [PATCH 01/76] added unittests for mo_lib_loopindices --- test/fortran/test_loopindices.f90 | 145 ++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 test/fortran/test_loopindices.f90 diff --git a/test/fortran/test_loopindices.f90 b/test/fortran/test_loopindices.f90 new file mode 100644 index 0000000..3c20630 --- /dev/null +++ b/test/fortran/test_loopindices.f90 @@ -0,0 +1,145 @@ +! ICON +! +! --------------------------------------------------------------- +! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +! Contact information: icon-model.org +! +! See AUTHORS.TXT for a list of authors +! See LICENSES/ for license information +! SPDX-License-Identifier: BSD-3-Clause +! --------------------------------------------------------------- + +MODULE TEST_mo_lib_loopindices + USE FORTUTF + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: TEST_get_indices_lib + +CONTAINS + + SUBROUTINE TEST_get_indices_lib + USE mo_lib_loopindices, ONLY: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib + + INTEGER :: i_startidx_in ! Start index as input + INTEGER :: i_endidx_in ! End index as input + INTEGER :: nproma ! inner loop length/vector length + INTEGER :: i_blk ! Current block (variable jb in do loops) + INTEGER :: i_startblk ! Start block of do loop + INTEGER :: i_endblk ! End block of do loop + + INTEGER :: i_startidx_out, i_endidx_out ! Start and end indices (jc loop), as output + + i_startidx_in = 2 + i_endidx_in = 15 + nproma = 32 + i_startblk = 1 + i_endblk = 40 + + ! CASE: I -> i_blk == i_startblk + i_blk = 1 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_c_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_e_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_v_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + ! CASE: II -> i_blk == i_endblk + i_blk = 40 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_c_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_e_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_v_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + ! CASE: III -> Every other cases + i_blk = 20 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_c_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_e_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_v_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + END SUBROUTINE TEST_get_indices_lib + +END MODULE TEST_mo_lib_loopindices -- GitLab From 71f4f33333004c8e598904c8911ab706ca9c5020 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:53:01 +0100 Subject: [PATCH 02/76] added unittests for mo_math_utilities, partially done --- test/fortran/test_math_utilities.f90 | 270 +++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 test/fortran/test_math_utilities.f90 diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 new file mode 100644 index 0000000..25064f5 --- /dev/null +++ b/test/fortran/test_math_utilities.f90 @@ -0,0 +1,270 @@ +! ICON +! +! --------------------------------------------------------------- +! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +! Contact information: icon-model.org +! +! See AUTHORS.TXT for a list of authors +! See LICENSES/ for license information +! SPDX-License-Identifier: BSD-3-Clause +! --------------------------------------------------------------- + +MODULE TEST_mo_math_utilities + + USE FORTUTF + USE mo_math_types, ONLY: t_cartesian_coordinates, t_geographical_coordinates, & + & t_line, t_tangent_vectors + USE mo_math_constants, ONLY: pi, pi_2, pi_4 + USE mo_lib_grid_geometry_info + ! USE mo_physical_constants, ONLY: earth_radius + USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY: wp => real64 + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec + REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius + +CONTAINS + + SUBROUTINE TEST_cc2gc + + USE mo_math_utilities, ONLY: cc2gc + + TYPE(t_cartesian_coordinates) :: coord + TYPE(t_grid_geometry_info) :: geometry_info + TYPE(t_geographical_coordinates) :: pos + + REAL(wp) :: lon_ref, lat_ref + + coord%x(1) = 10.0_wp + coord%x(2) = 10.0_wp + coord%x(3) = 5.0_wp + + geometry_info%geometry_type = sphere_geometry + + pos = cc2gc(coord, geometry_info) + + lon_ref = 0.78539816339744828_wp + lat_ref = 0.33983690945412193_wp + + CALL TAG_TEST("TEST_cc2gc_sphere_lon") + CALL ASSERT_EQUAL(pos%lon, lon_ref) + CALL TAG_TEST("TEST_cc2gc_sphere_lat") + CALL ASSERT_EQUAL(pos%lat, lat_ref) + + geometry_info%geometry_type = planar_torus_geometry + + geometry_info%domain_length = 2.0_wp*pi*earth_radius + geometry_info%domain_height = 2.0_wp*pi*earth_radius + + coord%x(1) = 1000000.0_wp + coord%x(2) = 100.0_wp + + pos = cc2gc(coord, geometry_info) + + lon_ref = 0.15695558894524117_wp + lat_ref = -0.17453205322393880_wp + + CALL TAG_TEST("TEST_cc2gc_torus_lon") + CALL ASSERT_EQUAL(pos%lon, lon_ref) + CALL TAG_TEST("TEST_cc2gc_torus_lat") + CALL ASSERT_EQUAL(pos%lat, lat_ref) + + END SUBROUTINE TEST_cc2gc + + SUBROUTINE TEST_gc2cc + + USE mo_math_utilities, ONLY: gc2cc + + TYPE(t_cartesian_coordinates) :: coord, coord_ref + TYPE(t_grid_geometry_info) :: geometry_info + TYPE(t_geographical_coordinates) :: pos + + REAL(wp) :: lon_ref, lat_ref, tol + INTEGER :: i + CHARACTER(LEN=32) :: tag + + tol = 1d-15 + pos%lon = 0.78_wp + pos%lat = 0.34_wp + + geometry_info%geometry_type = sphere_geometry + + coord = gc2cc(pos, geometry_info) + + coord_ref%x(1) = 0.6702170547483377_wp + coord_ref%x(2) = 0.6630199536212522_wp + coord_ref%x(3) = 0.3334870921408144_wp + + DO i = 1, SIZE(coord%x) +! write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i) + WRITE (tag, '(a,i1)') "TEST_gc2cc_sphere_", i + CALL TAG_TEST(tag) + CALL ASSERT_ALMOST_EQUAL(coord%x(i), coord_ref%x(i), tol) + END DO + + geometry_info%geometry_type = planar_torus_geometry + + geometry_info%domain_length = 2.0_wp*pi*earth_radius + geometry_info%domain_height = 2.0_wp*pi*earth_radius + + pos%lon = 0.15_wp + pos%lat = -0.17_wp + + coord = gc2cc(pos, geometry_info) + + coord_ref%x(1) = 955684.3499999998603016_wp + coord_ref%x(2) = 519845.4807382422150113_wp + coord_ref%x(3) = 0.0_wp + + DO i = 1, SIZE(coord%x) +! write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i) + WRITE (tag, '(a,i1)') "TEST_gc2cc_torus_", i + CALL TAG_TEST(tag) + CALL ASSERT_EQUAL(coord%x(i), coord_ref%x(i)) + END DO + + END SUBROUTINE TEST_gc2cc + + SUBROUTINE TEST_cc2tv + USE mo_math_utilities, ONLY: cc2tv + + TYPE(t_cartesian_coordinates) :: coord + TYPE(t_geographical_coordinates) :: pos + TYPE(t_tangent_vectors):: tt + REAL(wp) :: v1_ref, v2_ref + + pos%lon = pi_2 + pos%lat = pi_4 + coord%x(1) = 10.0_wp + coord%x(2) = 15.0_wp + coord%x(3) = 5.0_wp + + tt = cc2tv(coord, pos) + + v1_ref = -9.9999999999999982_wp + v2_ref = -7.0710678118654737_wp + + CALL TAG_TEST("TEST_cc2tv_v1") + CALL ASSERT_EQUAL(tt%v1, v1_ref) + CALL TAG_TEST("TEST_cc2tv_v2") + CALL ASSERT_EQUAL(tt%v2, v2_ref) + + END SUBROUTINE TEST_cc2tv + + SUBROUTINE TEST_gvec2cvec + USE mo_math_utilities, ONLY: gvec2cvec + + REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. component + REAL(wp) :: p_long, p_lat ! geo. coord. of data point + TYPE(t_grid_geometry_info) :: geometry_info + + REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector + REAL(wp) :: p_cu_ref, p_cv_ref, p_cw_ref ! Cart. vector ref + + geometry_info%geometry_type = sphere_geometry + + p_gu = 10.0_wp + p_gv = 5.0_wp + + p_long = pi_2 + p_lat = pi_4 + + CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info) + + p_cu_ref = -10.0_wp + p_cv_ref = -3.5355339059327369_wp + p_cw_ref = 3.5355339059327378_wp + + CALL TAG_TEST("TEST_gvec2cvec_sphere_cu") + CALL ASSERT_EQUAL(p_cu, p_cu_ref) + CALL TAG_TEST("TEST_gvec2cvec_sphere_cv") + CALL ASSERT_EQUAL(p_cv, p_cv_ref) + CALL TAG_TEST("TEST_gvec2cvec_sphere_cw") + CALL ASSERT_EQUAL(p_cw, p_cw_ref) + + geometry_info%geometry_type = planar_torus_geometry + + CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info) + + CALL TAG_TEST("TEST_gvec2cvec_torus_cu") + CALL ASSERT_EQUAL(p_cu, p_gu) + CALL TAG_TEST("TEST_gvec2cvec_torus_cv") + CALL ASSERT_EQUAL(p_cv, p_gv) + CALL TAG_TEST("TEST_gvec2cvec_torus_cw") + CALL ASSERT_EQUAL(p_cw, 0.0_wp) + + END SUBROUTINE TEST_gvec2cvec + + SUBROUTINE TEST_cvec2gvec + USE mo_math_utilities, ONLY: cvec2gvec + + REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector + REAL(wp) :: p_long, p_lat ! geo. coord. of data point + TYPE(t_grid_geometry_info) :: geometry_info + + REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. comp. + REAL(wp) :: p_gu_ref, p_gv_ref + + geometry_info%geometry_type = sphere_geometry + + p_cu = -10.0_wp + p_cv = -3.0_wp + p_cw = 3.0_wp + + p_long = pi_2 + p_lat = pi_4 + + CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info) + +! write(*,"(a,f24.16)") ' p_gu: ', p_gu +! write(*,"(a,f24.16)") ' p_gv: ', p_gv + + p_gu_ref = 10.0_wp + p_gv_ref = 4.2426406871192857_wp + + CALL TAG_TEST("TEST_cvec2gvec_sphere_gu") + CALL ASSERT_EQUAL(p_gu, p_gu_ref) + CALL TAG_TEST("TEST_cvec2gvec_sphere_gv") + CALL ASSERT_EQUAL(p_gv, p_gv_ref) + + geometry_info%geometry_type = planar_torus_geometry + + CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info) + + CALL TAG_TEST("TEST_cvec2gvec_torus_gu") + CALL ASSERT_EQUAL(p_gu, p_cu) + CALL TAG_TEST("TEST_cvec2gvec_torus_gv") + CALL ASSERT_EQUAL(p_gv, p_cv) + + END SUBROUTINE TEST_cvec2gvec + + SUBROUTINE TEST_tdma_solver_vec + + USE mo_math_utilities, ONLY: tdma_solver_vec + + INTEGER, PARAMETER :: n = 10 + REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n) + INTEGER :: i, j + REAL(wp) :: sum, sum_ref + DO i = 1, n + DO j = 1, n + a(i, j) = 1.0_wp + b(i, j) = 2.0_wp + c(i, j) = 1.0_wp + d(i, j) = 1.0_wp + END DO + END DO + CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) + sum = 0.0_wp + DO i = 1, n + sum = sum + x(i, 1) + END DO + sum_ref = 4.5454545454545467_wp + CALL TAG_TEST("TEST_tdma_solver_vec") + CALL ASSERT_EQUAL(sum, sum_ref) + END SUBROUTINE TEST_tdma_solver_vec + +END MODULE TEST_mo_math_utilities -- GitLab From 94d58662868f855ad7c152dd30c3be6af4e6de35 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:54:28 +0100 Subject: [PATCH 03/76] updated the gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 37de517..18c1a34 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ iconmath_Tests # Test stage files: /**/Testing/* run_tests.f90 +iconmath_Tests +run_tests.f90 -- GitLab From 4b8bfcaad0d84b86ec207dad798c339d3cb16263 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 10:04:20 +0100 Subject: [PATCH 04/76] fixed a style-check issue --- test/fortran/test_math_utilities.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 25064f5..70fd9ab 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -24,7 +24,7 @@ MODULE TEST_mo_math_utilities PRIVATE PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec - REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius + REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius CONTAINS -- GitLab From bbee0c88db2cb1cd217bb3dea40330acf7d0f786 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:54:28 +0100 Subject: [PATCH 05/76] updated the gitignore file --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 18c1a34..23535bf 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,5 @@ iconmath_Tests # Test stage files: /**/Testing/* -run_tests.f90 iconmath_Tests run_tests.f90 -- GitLab From 6c0ce7e82ef034ca74a620b38e0b35173c3855d1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 14:57:31 +0100 Subject: [PATCH 06/76] changed the extension of mo_lib_loopindices to F90 --- src/support/CMakeLists.txt | 2 +- src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} (100%) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 8e5fcc4..0bce304 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -13,7 +13,7 @@ add_library( iconmath-support mo_gridman_constants.f90 mo_lib_grid_geometry_info.f90 - mo_lib_loopindices.f90 + mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 mo_math_utilities.F90 diff --git a/src/support/mo_lib_loopindices.f90 b/src/support/mo_lib_loopindices.F90 similarity index 100% rename from src/support/mo_lib_loopindices.f90 rename to src/support/mo_lib_loopindices.F90 -- GitLab From 0a4a1ee48fb1d328d3698f5bcb1e2d8c73be319e Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 14:58:40 +0100 Subject: [PATCH 07/76] added the cpp version of mo_lib_loopindices and compiled the code made changes to debug the last version --- CMakeLists.txt | 2 +- src/support/CMakeLists.txt | 12 +++++++- src/support/mo_lib_loopindices.cpp | 46 ++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 src/support/mo_lib_loopindices.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index daafc5d..c40cd40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.18) project( iconmath VERSION 1.0.0 - LANGUAGES Fortran) + LANGUAGES Fortran CXX) option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(BUILD_TESTING "Build tests" ON) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 0bce304..6d2fd78 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( iconmath-support mo_gridman_constants.f90 mo_lib_grid_geometry_info.f90 + mo_lib_loopindices.cpp mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 @@ -57,7 +58,16 @@ target_include_directories( # Path to the Fortran modules: $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>> $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> -) + INTERFACE + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp new file mode 100644 index 0000000..9810427 --- /dev/null +++ b/src/support/mo_lib_loopindices.cpp @@ -0,0 +1,46 @@ +#include <algorithm> // For std::max + +extern "C" { + // get_indices_c_lib function + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + if (i_blk == i_startblk) { + i_startidx_out = std::max(1, i_startidx_in); + i_endidx_out = nproma; + if (i_blk == i_endblk) { + i_endidx_out = i_endidx_in; + } + } else if (i_blk == i_endblk) { + i_startidx_out = 1; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = 1; + i_endidx_out = nproma; + } + } + + // get_indices_e_lib function + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in); + i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; + } + + // get_indices_v_lib function + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + if (i_blk == i_startblk) { + i_startidx_out = i_startidx_in; + i_endidx_out = nproma; + if (i_blk == i_endblk) { + i_endidx_out = i_endidx_in; + } + } else if (i_blk == i_endblk) { + i_startidx_out = 1; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = 1; + i_endidx_out = nproma; + } + } +} -- GitLab From ffca8a6d89d5e38cb4f28e4a5423c21145fed53f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 15:07:21 +0100 Subject: [PATCH 08/76] enabled the use of cpp bindings for mo_lib_loopindices --- CMakeLists.txt | 1 + src/support/CMakeLists.txt | 4 ++++ src/support/mo_lib_loopindices.F90 | 34 +++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c40cd40..2f32fcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ option(BUILD_ICONMATH_HORIZONTAL "Build horizontal library" ON) option(IM_ENABLE_MIXED_PRECISION "Enable mixed precision" OFF) option(IM_ENABLE_LOOP_EXCHANGE "Enable loop exchange" OFF) +option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF) option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF) option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF) option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 6d2fd78..c0fc287 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -41,6 +41,10 @@ if(IM_ENABLE_DIM_SWAP) target_compile_definitions(iconmath-support PRIVATE __SWAPDIM) endif() +if(IM_USE_CPP_BINDINGS) + target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS) +endif() + if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90 index fe6c9b9..3ac80dd 100644 --- a/src/support/mo_lib_loopindices.F90 +++ b/src/support/mo_lib_loopindices.F90 @@ -16,12 +16,18 @@ MODULE mo_lib_loopindices +#ifdef __USE_CPP_BINDINGS + USE, INTRINSIC :: ISO_C_BINDING +#endif + IMPLICIT NONE PRIVATE PUBLIC :: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib +#ifndef __USE_CPP_BINDINGS + CONTAINS !------------------------------------------------------------------------- @@ -121,5 +127,31 @@ CONTAINS END SUBROUTINE get_indices_v_lib -END MODULE mo_lib_loopindices +#else + + INTERFACE + SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_c_lib + + SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_e_lib + + SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_v_lib + END INTERFACE + +#endif +END MODULE mo_lib_loopindices -- GitLab From f6ac82117b5ab528b1bebfb8d843b8fb371d9cbd Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 2 Jan 2025 22:24:47 +0100 Subject: [PATCH 09/76] added the cpp version of tdma_solver_vec made it compile --- CMakeLists.txt | 2 + src/support/CMakeLists.txt | 1 + src/support/mo_math_utilities.F90 | 164 +++++++++++++++++------------- src/support/mo_math_utilities.cpp | 77 ++++++++++++++ 4 files changed, 172 insertions(+), 72 deletions(-) create mode 100644 src/support/mo_math_utilities.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f32fcf..8fb4acf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ project( VERSION 1.0.0 LANGUAGES Fortran CXX) +set(CMAKE_CXX_STANDARD 17) + option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(BUILD_TESTING "Build tests" ON) option(BUILD_ICONMATH_INTERPOLATION "Build interpolation library" ON) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index c0fc287..35e1c71 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -17,6 +17,7 @@ add_library( mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 + mo_math_utilities.cpp mo_math_utilities.F90 mo_random_number_generators.F90) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 0add18c..5c00192 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -22,6 +22,7 @@ ! #endif MODULE mo_math_utilities + USE, INTRINSIC :: ISO_C_BINDING USE mo_iconlib_kind, ONLY: wp, dp, sp USE mo_math_constants, ONLY: pi, pi_2, dbl_eps USE mo_gridman_constants, ONLY: SUCCESS, TORUS_MAX_LAT @@ -165,7 +166,98 @@ MODULE mo_math_utilities CHARACTER(LEN=*), PARAMETER :: modname = 'mo_math_utilities' + !------------------------------------------------------------------------- + !> + !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i + !! + !! a - sub-diagonal (means it is the diagonal below the main diagonal) + !! b - the main diagonal + !! c - sup-diagonal (means it is the diagonal above the main diagonal) + !! d - right part + !! varout - the answer (identical to x in description above) + !! slev - start level (top) + !! elev - end level (bottom) + +! Preprocessor directive to conditionally include the tdma_solver_vec implementation +#ifndef __USE_CPP_BINDINGS + + CONTAINS + + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) + INTEGER, INTENT(IN) :: slev, elev + INTEGER, INTENT(IN) :: startidx, endidx + REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :) + REAL(wp), INTENT(OUT) :: varout(:, :) + INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue + + ! + ! local + REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2)) + INTEGER :: i + INTEGER :: jc + INTEGER :: acc_queue + + IF (PRESENT(opt_acc_queue)) THEN + acc_queue = opt_acc_queue + ELSE + acc_queue = 1 + END IF + + ! initialize c-prime and d-prime + !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue) + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + c_p(jc, slev) = c(jc, slev)/b(jc, slev) + d_p(jc, slev) = d(jc, slev)/b(jc, slev) + END DO + ! solve for vectors c-prime and d-prime + !$ACC LOOP SEQ +!NEC$ outerloop_unroll(4) + DO i = slev + 1, elev + !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m) + DO jc = startidx, endidx + m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i)) + c_p(jc, i) = c(jc, i)*m + d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m + END DO + END DO + ! initialize varout + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + varout(jc, elev) = d_p(jc, elev) + END DO + ! solve for varout from the vectors c-prime and d-prime + !$ACC LOOP SEQ +!NEC$ outerloop_unroll(4) + DO i = elev - 1, slev, -1 + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1) + END DO + END DO + !$ACC END PARALLEL + + IF (.NOT. PRESENT(opt_acc_queue)) THEN + !$ACC WAIT(acc_queue) + END IF + + END SUBROUTINE tdma_solver_vec + +#else + + ! C++ binding for tdma_solver_vec + INTERFACE + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + IMPORT :: C_DOUBLE, C_INT + REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) + INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx + REAL(C_DOUBLE), INTENT(OUT) :: varout(*) + INTEGER(C_INT), OPTIONAL :: opt_acc_queue + END SUBROUTINE tdma_solver_vec + END INTERFACE + CONTAINS +#endif !------------------------------------------------------------------------- ! Variant for double-precision (or working-precision=dp) lon+lat in ICON @@ -2041,78 +2133,6 @@ CONTAINS END SUBROUTINE tdma_solver - !------------------------------------------------------------------------- - !> - !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i - !! - !! a - sub-diagonal (means it is the diagonal below the main diagonal) - !! b - the main diagonal - !! c - sup-diagonal (means it is the diagonal above the main diagonal) - !! d - right part - !! varout - the answer (identical to x in description above) - !! slev - start level (top) - !! elev - end level (bottom) - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) - INTEGER, INTENT(IN) :: slev, elev - INTEGER, INTENT(IN) :: startidx, endidx - REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :) - REAL(wp), INTENT(OUT) :: varout(:, :) - INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue - - ! - ! local - REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2)) - INTEGER :: i - INTEGER :: jc - INTEGER :: acc_queue - - IF (PRESENT(opt_acc_queue)) THEN - acc_queue = opt_acc_queue - ELSE - acc_queue = 1 - END IF - - ! initialize c-prime and d-prime - !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue) - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - c_p(jc, slev) = c(jc, slev)/b(jc, slev) - d_p(jc, slev) = d(jc, slev)/b(jc, slev) - END DO - ! solve for vectors c-prime and d-prime - !$ACC LOOP SEQ -!NEC$ outerloop_unroll(4) - DO i = slev + 1, elev - !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m) - DO jc = startidx, endidx - m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i)) - c_p(jc, i) = c(jc, i)*m - d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m - END DO - END DO - ! initialize varout - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - varout(jc, elev) = d_p(jc, elev) - END DO - ! solve for varout from the vectors c-prime and d-prime - !$ACC LOOP SEQ -!NEC$ outerloop_unroll(4) - DO i = elev - 1, slev, -1 - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1) - END DO - END DO - !$ACC END PARALLEL - - IF (.NOT. PRESENT(opt_acc_queue)) THEN - !$ACC WAIT(acc_queue) - END IF - - END SUBROUTINE tdma_solver_vec - !------------------------------------------------------------------------- - !------------------------------------------------------------------------- ! !> Helper functions for computing the vertical layer structure diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp new file mode 100644 index 0000000..a8ccce4 --- /dev/null +++ b/src/support/mo_math_utilities.cpp @@ -0,0 +1,77 @@ +#include <vector> +#include <iostream> +#include <chrono> // For timing + +extern "C" { + +void tdma_solver_vec(double *a, double *b, double *c, double *d, + int slev, int elev, int startidx, int endidx, + double* varout, int opt_acc_queue = -1) { + + int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided + + // Determine array sizes based on startidx and endidx + int nrows = endidx - startidx; + int ncols = elev - slev; + + // Temporary arrays for c-prime and d-prime + std::vector<double> cp(nrows * ncols, 0.0); + std::vector<double> dp(nrows * ncols, 0.0); + + // Helper function to access 2D arrays stored as 1D + auto idx = [&](int row, int col) { return col * nrows + row; }; + + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + + // OpenACC Parallel Region + #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) + { + // Initialize c-prime and d-prime + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; + dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; + } + + // Solve for vectors c-prime and d-prime + #pragma acc loop seq + for (int i = slev + 1; i < elev; ++i) { + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); + cp[idx(jc, i)] = c[idx(jc, i)] * m; + dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; + } + } + + // Initialize varout + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; + } + + // Solve for varout from the vectors c-prime and d-prime + #pragma acc loop seq + for (int i = elev - 2; i >= slev; --i) { + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; + } + } + } + + printf("tdma_solver_vec: completed using C++\n"); + + // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional + if (opt_acc_queue == -1) { + #pragma acc wait(acc_queue) + } + + // End timing + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> elapsed_time = end_time - start_time; + + std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl; +} +} -- GitLab From d5f6519db2f64ad83de9f3820bd91ce95f48a231 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 2 Jan 2025 22:29:20 +0100 Subject: [PATCH 10/76] added the test for the cpp binding of tdma_solver_vec fixed a bug in testing --- src/support/CMakeLists.txt | 2 +- test/fortran/test_math_utilities.f90 | 29 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 35e1c71..7af714e 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -43,7 +43,7 @@ if(IM_ENABLE_DIM_SWAP) endif() if(IM_USE_CPP_BINDINGS) - target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS) + target_compile_definitions(iconmath-support PUBLIC __USE_CPP_BINDINGS) endif() if(IM_ENABLE_OPENACC) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 70fd9ab..8dcf5b3 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -248,7 +248,9 @@ CONTAINS INTEGER, PARAMETER :: n = 10 REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n) INTEGER :: i, j - REAL(wp) :: sum, sum_ref + REAL(wp) :: sum, sum_ref, tol + REAL(wp) :: start_time, end_time, elapsed_time + DO i = 1, n DO j = 1, n a(i, j) = 1.0_wp @@ -257,14 +259,33 @@ CONTAINS d(i, j) = 1.0_wp END DO END DO + + CALL CPU_TIME(start_time) +#ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) +#else + CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1) +#endif + CALL CPU_TIME(end_time) + + ! Compute elapsed time + elapsed_time = end_time - start_time + + ! Output timing result + write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" + sum = 0.0_wp DO i = 1, n - sum = sum + x(i, 1) + DO j = 1, n + sum = sum + x(i, j) + ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1) + END DO END DO - sum_ref = 4.5454545454545467_wp + sum_ref = 27.2727272727272769_wp + tol = 1d-15 CALL TAG_TEST("TEST_tdma_solver_vec") - CALL ASSERT_EQUAL(sum, sum_ref) + CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) + END SUBROUTINE TEST_tdma_solver_vec END MODULE TEST_mo_math_utilities -- GitLab From 5ee6fbde578de94ace5a671b3fe0dddb68ea7b85 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:31:02 +0100 Subject: [PATCH 11/76] changed the way local arrays are defined, it improves the performance fixed a bug --- src/support/mo_math_utilities.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index a8ccce4..f82cd27 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -14,9 +14,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, int nrows = endidx - startidx; int ncols = elev - slev; - // Temporary arrays for c-prime and d-prime - std::vector<double> cp(nrows * ncols, 0.0); - std::vector<double> dp(nrows * ncols, 0.0); + double* cp = new double[nrows * ncols]; + double* dp = new double[nrows * ncols]; // Helper function to access 2D arrays stored as 1D auto idx = [&](int row, int col) { return col * nrows + row; }; @@ -68,6 +67,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, #pragma acc wait(acc_queue) } + // Free memory at the end + delete[] cp; + delete[] dp; // End timing auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; -- GitLab From 6416163ab3904752c0e7364e41046a285a2fb4a1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:34:24 +0100 Subject: [PATCH 12/76] replaced the lambda function for calculating combined index with a macro function fixed a bug --- src/support/mo_math_utilities.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index f82cd27..c2b46dc 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, double* cp = new double[nrows * ncols]; double* dp = new double[nrows * ncols]; - // Helper function to access 2D arrays stored as 1D - auto idx = [&](int row, int col) { return col * nrows + row; }; - - // Start timing - auto start_time = std::chrono::high_resolution_clock::now(); + #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function // OpenACC Parallel Region #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) @@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Initialize c-prime and d-prime #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; - dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; + cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)]; + dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)]; } // Solve for vectors c-prime and d-prime @@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = slev + 1; i < elev; ++i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); - cp[idx(jc, i)] = c[idx(jc, i)] * m; - dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; + double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]); + cp[IDX(jc, i)] = c[IDX(jc, i)] * m; + dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m; } } // Initialize varout #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; + varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)]; } // Solve for varout from the vectors c-prime and d-prime @@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = elev - 2; i >= slev; --i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; + varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)]; } } } @@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Free memory at the end delete[] cp; delete[] dp; + // End timing auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; -- GitLab From 359fd431d9709237e654b9dbb72450a4f1c18d07 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:35:30 +0100 Subject: [PATCH 13/76] change the place of start the timer --- src/support/mo_math_utilities.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index c2b46dc..45430b0 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -8,6 +8,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, int slev, int elev, int startidx, int endidx, double* varout, int opt_acc_queue = -1) { + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided // Determine array sizes based on startidx and endidx -- GitLab From 28081276395b98cab705222bb01251cd09413842 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:08:00 +0100 Subject: [PATCH 14/76] added nrows and ncols as arguments to the cpp routine of tdma_solver_vec --- src/support/mo_math_utilities.F90 | 4 ++-- src/support/mo_math_utilities.cpp | 6 +----- test/fortran/test_math_utilities.f90 | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 5c00192..525d2a1 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -247,10 +247,10 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") IMPORT :: C_DOUBLE, C_INT REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) - INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx + INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(C_DOUBLE), INTENT(OUT) :: varout(*) INTEGER(C_INT), OPTIONAL :: opt_acc_queue END SUBROUTINE tdma_solver_vec diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index 45430b0..ff94b89 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -6,17 +6,13 @@ extern "C" { void tdma_solver_vec(double *a, double *b, double *c, double *d, int slev, int elev, int startidx, int endidx, - double* varout, int opt_acc_queue = -1) { + int nrows, int ncols, double *varout, int opt_acc_queue = -1) { // Start timing auto start_time = std::chrono::high_resolution_clock::now(); int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided - // Determine array sizes based on startidx and endidx - int nrows = endidx - startidx; - int ncols = elev - slev; - double* cp = new double[nrows * ncols]; double* dp = new double[nrows * ncols]; diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 8dcf5b3..9f95ca7 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -264,7 +264,7 @@ CONTAINS #ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) #else - CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1) + CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, n, n, x, -1) #endif CALL CPU_TIME(end_time) -- GitLab From aa678645f5d4c9aa8ba31785e7190a9757df0943 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:20:50 +0100 Subject: [PATCH 15/76] added an additional test for tdma_solver_vec --- test/fortran/test_math_utilities.f90 | 31 +++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 9f95ca7..db8b824 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -253,13 +253,15 @@ CONTAINS DO i = 1, n DO j = 1, n - a(i, j) = 1.0_wp - b(i, j) = 2.0_wp - c(i, j) = 1.0_wp - d(i, j) = 1.0_wp + a(i, j) = 1.0_wp*(i+j) + b(i, j) = 2.0_wp*(i+j) + c(i, j) = 1.0_wp*(i+j) + d(i, j) = 1.0_wp*(i+j) END DO END DO + tol = 1d-15 + CALL CPU_TIME(start_time) #ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) @@ -278,12 +280,27 @@ CONTAINS DO i = 1, n DO j = 1, n sum = sum + x(i, j) - ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1) END DO END DO sum_ref = 27.2727272727272769_wp - tol = 1d-15 - CALL TAG_TEST("TEST_tdma_solver_vec") + CALL TAG_TEST("TEST_tdma_solver_vec_full") + CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) + + x = 0.0_wp +#ifndef __USE_CPP_BINDINGS + CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x) +#else + CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1) +#endif + sum = 0.0_wp + DO i = 2, n-1 + DO j = 2, n-1 + sum = sum + x(i, j) + END DO + END DO + sum_ref = 17.7777777777777679_wp + + CALL TAG_TEST("TEST_tdma_solver_vec_partial") CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) END SUBROUTINE TEST_tdma_solver_vec -- GitLab From 93e6ccb729e4ae4491142089d2543fc1994cd559 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:44:43 +0100 Subject: [PATCH 16/76] updated the extension of test_math_utilities --- test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} (100%) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.F90 similarity index 100% rename from test/fortran/test_math_utilities.f90 rename to test/fortran/test_math_utilities.F90 -- GitLab From 4ae02053f5399fbf5f2e21f2328b463cf01e5d54 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:58:39 +0100 Subject: [PATCH 17/76] fixed a style formatting issue --- src/support/mo_math_utilities.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 525d2a1..6cafa98 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -247,7 +247,8 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec") IMPORT :: C_DOUBLE, C_INT REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols -- GitLab From d3f283c78f0c517771647e2f045c7c2b896e490f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 12:06:42 +0100 Subject: [PATCH 18/76] fixed further style formatting issues --- src/support/mo_lib_loopindices.F90 | 18 +++++++++--------- src/support/mo_math_utilities.F90 | 16 ++++++++-------- test/fortran/test_math_utilities.F90 | 18 +++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90 index 3ac80dd..ce67af7 100644 --- a/src/support/mo_lib_loopindices.F90 +++ b/src/support/mo_lib_loopindices.F90 @@ -132,23 +132,23 @@ CONTAINS INTERFACE SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_c_lib SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_e_lib SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_v_lib END INTERFACE diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 6cafa98..459ff76 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -181,7 +181,7 @@ MODULE mo_math_utilities ! Preprocessor directive to conditionally include the tdma_solver_vec implementation #ifndef __USE_CPP_BINDINGS - CONTAINS +CONTAINS SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) INTEGER, INTENT(IN) :: slev, elev @@ -247,13 +247,13 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec") - IMPORT :: C_DOUBLE, C_INT - REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) - INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols - REAL(C_DOUBLE), INTENT(OUT) :: varout(*) - INTEGER(C_INT), OPTIONAL :: opt_acc_queue + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec") + IMPORT :: c_double, c_int + REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) + INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols + REAL(c_double), INTENT(OUT) :: varout(*) + INTEGER(c_int), OPTIONAL :: opt_acc_queue END SUBROUTINE tdma_solver_vec END INTERFACE diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.F90 index db8b824..07dcfb3 100644 --- a/test/fortran/test_math_utilities.F90 +++ b/test/fortran/test_math_utilities.F90 @@ -253,10 +253,10 @@ CONTAINS DO i = 1, n DO j = 1, n - a(i, j) = 1.0_wp*(i+j) - b(i, j) = 2.0_wp*(i+j) - c(i, j) = 1.0_wp*(i+j) - d(i, j) = 1.0_wp*(i+j) + a(i, j) = 1.0_wp*(i + j) + b(i, j) = 2.0_wp*(i + j) + c(i, j) = 1.0_wp*(i + j) + d(i, j) = 1.0_wp*(i + j) END DO END DO @@ -274,7 +274,7 @@ CONTAINS elapsed_time = end_time - start_time ! Output timing result - write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" + WRITE (*, *) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" sum = 0.0_wp DO i = 1, n @@ -288,13 +288,13 @@ CONTAINS x = 0.0_wp #ifndef __USE_CPP_BINDINGS - CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x) + CALL tdma_solver_vec(a, b, c, d, 2, n - 1, 2, n - 1, x) #else - CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1) + CALL tdma_solver_vec(a, b, c, d, 1, n - 1, 1, n - 1, n, n, x, -1) #endif sum = 0.0_wp - DO i = 2, n-1 - DO j = 2, n-1 + DO i = 2, n - 1 + DO j = 2, n - 1 sum = sum + x(i, j) END DO END DO -- GitLab From 520166f9c76798a206f2f4901588f922cbd57012 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 12:10:24 +0100 Subject: [PATCH 19/76] added licences to the new cpp files --- src/support/mo_lib_loopindices.cpp | 11 +++++++++++ src/support/mo_math_utilities.cpp | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index 9810427..e6d9d21 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <algorithm> // For std::max extern "C" { diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index ff94b89..e171606 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <vector> #include <iostream> #include <chrono> // For timing -- GitLab From d2ee94aa91ae6cc626420c60c0d9bb40827dac1d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 14:59:55 +0100 Subject: [PATCH 20/76] enabled compilation using Kokkos --- src/support/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 7af714e..b4ceb37 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -57,6 +57,9 @@ if(IM_ENABLE_OPENACC) endif() endif() +message(STATUS "iconmath-support enabling Kokkos") +find_package(Kokkos REQUIRED) + target_include_directories( iconmath-support PUBLIC @@ -74,7 +77,7 @@ target_include_directories( # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) -target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support) +target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From 64d639c41092588cc4eac6a985c2be1d62baa0ef Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 15:00:45 +0100 Subject: [PATCH 21/76] converted the c++ code in mo_math_utilities to Kokkos --- src/support/mo_math_utilities.cpp | 128 ++++++++++++++---------------- 1 file changed, 61 insertions(+), 67 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index e171606..b3031a5 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -12,75 +12,69 @@ #include <vector> #include <iostream> #include <chrono> // For timing +#include <Kokkos_Core.hpp> extern "C" { -void tdma_solver_vec(double *a, double *b, double *c, double *d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double *varout, int opt_acc_queue = -1) { - - // Start timing - auto start_time = std::chrono::high_resolution_clock::now(); - - int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided - - double* cp = new double[nrows * ncols]; - double* dp = new double[nrows * ncols]; - - #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function - - // OpenACC Parallel Region - #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) - { - // Initialize c-prime and d-prime - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)]; - dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)]; - } - - // Solve for vectors c-prime and d-prime - #pragma acc loop seq - for (int i = slev + 1; i < elev; ++i) { - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]); - cp[IDX(jc, i)] = c[IDX(jc, i)] * m; - dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m; - } - } - - // Initialize varout - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)]; - } - - // Solve for varout from the vectors c-prime and d-prime - #pragma acc loop seq - for (int i = elev - 2; i >= slev; --i) { - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)]; - } - } - } - - printf("tdma_solver_vec: completed using C++\n"); - - // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional - if (opt_acc_queue == -1) { - #pragma acc wait(acc_queue) - } - - // Free memory at the end - delete[] cp; - delete[] dp; - - // End timing - auto end_time = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> elapsed_time = end_time - start_time; - - std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl; +void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout) { + + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + + // Allocate temporary arrays using Kokkos::View. + // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols]. + // Kokkos::View automatically handles memory management. + Kokkos::View<double**> c_p("c_p", nrows, ncols); + Kokkos::View<double**> d_p("d_p", nrows, ncols); + + // Wrap the input arrays in unmanaged views. + // We assume that the input arrays are laid out in column-major order as in the original code. + // Here we use LayoutLeft so that the first index (row) is contiguous. + typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; + typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; + UnmanagedConst2D a_view(a, nrows, ncols); + UnmanagedConst2D b_view(b, nrows, ncols); + UnmanagedConst2D c_view(c, nrows, ncols); + UnmanagedConst2D d_view(d, nrows, ncols); + Unmanaged2D varout_view(varout, nrows, ncols); + + // Initialize c-prime and d-prime at the starting level (slev) + Kokkos::parallel_for("init_c_p_d_p", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + c_p(jc, slev) = c_view(jc, slev) / b_view(jc, slev); + d_p(jc, slev) = d_view(jc, slev) / b_view(jc, slev); + }); + Kokkos::fence(); + + // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1. + for (int i = slev + 1; i < elev; ++i) { + Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); + c_p(jc, i) = c_view(jc, i) * m; + d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m; + }); + Kokkos::fence(); + } + + // Initialize the output array at the last level (elev-1) + Kokkos::parallel_for("init_varout", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + varout_view(jc, elev-1) = d_p(jc, elev-1); + }); + Kokkos::fence(); + + // Back substitution: update varout for columns from elev-2 down to slev. + for (int i = elev - 2; i >= slev; --i) { + Kokkos::parallel_for("back_substitution", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + varout_view(jc, i) = d_p(jc, i) - c_p(jc, i) * varout_view(jc, i + 1); + }); + Kokkos::fence(); + } + + // End timing and print the elapsed time + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> elapsed_time = end_time - start_time; + std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } + } -- GitLab From 0405475748be645cb25de404c4acd96d9ea58ca1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 17:16:59 +0100 Subject: [PATCH 22/76] build kokkos internally along with the package --- CMakeLists.txt | 16 ++++++++++++++++ src/support/CMakeLists.txt | 10 ++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fb4acf..ab93b92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,22 @@ else() endif() endif() +include(FetchContent) +# configure kokkos 4.4 repository link +FetchContent_Declare(kokkos + URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz + URL_HASH MD5=eafd0d42c9831858aa84fde78576644c +) + +# disable build of C++23 mdspan experimental support for now +set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") + +# by default, build the Kokkos serial backend for CPU +set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend") +set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations") + +FetchContent_MakeAvailable(kokkos) + add_subdirectory(src) # Allow for 'make test' even if the tests are disabled: diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index b4ceb37..9f56017 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -57,9 +57,6 @@ if(IM_ENABLE_OPENACC) endif() endif() -message(STATUS "iconmath-support enabling Kokkos") -find_package(Kokkos REQUIRED) - target_include_directories( iconmath-support PUBLIC @@ -77,7 +74,12 @@ target_include_directories( # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) -target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos) +target_link_libraries(iconmath-support + PUBLIC + fortran-support::fortran-support + PRIVATE + Kokkos::kokkos +) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From 3b07f8ca617ce3bfbcdfefc052fff032aa496a10 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 17:27:31 +0100 Subject: [PATCH 23/76] changed cmake format --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab93b92..26137c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,10 +112,10 @@ endif() include(FetchContent) # configure kokkos 4.4 repository link -FetchContent_Declare(kokkos - URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz - URL_HASH MD5=eafd0d42c9831858aa84fde78576644c -) +FetchContent_Declare( + kokkos + URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz + URL_HASH MD5=eafd0d42c9831858aa84fde78576644c) # disable build of C++23 mdspan experimental support for now set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") -- GitLab From 2b40e45e4e8ffe2e31ea13f1c65950debfdfcce0 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 18:01:11 +0100 Subject: [PATCH 24/76] fixed an error in cmake styling --- .cmake-format.py | 1 + CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.cmake-format.py b/.cmake-format.py index 31f47fa..1b19e4e 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -37,3 +37,4 @@ with section("lint"): local_var_pattern = '[a-zA-Z][0-9a-zA-z_]+' private_var_pattern = '[a-z][a-z0-9_]+' public_var_pattern = '[A-Z][0-9a-zA-Z_]+' + global_var_pattern = '[A-Z][0-9a-zA-Z_]+' diff --git a/CMakeLists.txt b/CMakeLists.txt index 26137c4..7cf92be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ FetchContent_Declare( URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz URL_HASH MD5=eafd0d42c9831858aa84fde78576644c) -# disable build of C++23 mdspan experimental support for now +# disable build of C++23 mdspan experimental support for now set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") # by default, build the Kokkos serial backend for CPU -- GitLab From 2de490de6d73db75b266f4744264c9a6d2767a8a Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 18:04:04 +0100 Subject: [PATCH 25/76] updated the gitignore file --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 23535bf..8123bd2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,12 @@ cmake_install.cmake iconmath-config-version.cmake iconmath-config.cmake iconmath-targets.cmake +KokkosConfig.cmake +KokkosConfigVersion.cmake +KokkosTargets.cmake +KokkosConfigCommon.cmake +Kokkos_Version_Info.cpp +Kokkos_Version_Info.hpp # Build stage files: *.L -- GitLab From 2f5735b579865f9b5ee5da0f59d8384a5cfadc82 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 14:25:53 +0100 Subject: [PATCH 26/76] made the cpp function a templated one --- src/support/mo_math_utilities.F90 | 10 ++++-- src/support/mo_math_utilities.cpp | 31 +++++++++++-------- src/support/mo_math_utilities.hpp | 15 +++++++++ ..._utilities.F90 => test_math_utilities.f90} | 0 4 files changed, 40 insertions(+), 16 deletions(-) create mode 100644 src/support/mo_math_utilities.hpp rename test/fortran/{test_math_utilities.F90 => test_math_utilities.f90} (100%) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 459ff76..bb8e6df 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -79,7 +79,11 @@ MODULE mo_math_utilities PUBLIC :: line_intersect PUBLIC :: lintersect PUBLIC :: tdma_solver +#ifndef __USE_CPP_BINDINGS PUBLIC :: tdma_solver_vec +#else + PUBLIC :: tdma_solver_vec_double +#endif PUBLIC :: check_orientation ! vertical coordinates routines @@ -247,14 +251,14 @@ CONTAINS ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec_double") IMPORT :: c_double, c_int REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(c_double), INTENT(OUT) :: varout(*) INTEGER(c_int), OPTIONAL :: opt_acc_queue - END SUBROUTINE tdma_solver_vec + END SUBROUTINE tdma_solver_vec_double END INTERFACE CONTAINS diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index b3031a5..6a60f2c 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -9,16 +9,12 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <vector> -#include <iostream> -#include <chrono> // For timing -#include <Kokkos_Core.hpp> +#include "mo_math_utilities.hpp" -extern "C" { - -void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d, +template <typename T> +void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout) { + int nrows, int ncols, T* varout) { // Start timing auto start_time = std::chrono::high_resolution_clock::now(); @@ -26,14 +22,14 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c // Allocate temporary arrays using Kokkos::View. // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols]. // Kokkos::View automatically handles memory management. - Kokkos::View<double**> c_p("c_p", nrows, ncols); - Kokkos::View<double**> d_p("d_p", nrows, ncols); + Kokkos::View<T**> c_p("c_p", nrows, ncols); + Kokkos::View<T**> d_p("d_p", nrows, ncols); // Wrap the input arrays in unmanaged views. // We assume that the input arrays are laid out in column-major order as in the original code. // Here we use LayoutLeft so that the first index (row) is contiguous. - typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; - typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; + typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; + typedef Kokkos::View<T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; UnmanagedConst2D a_view(a, nrows, ncols); UnmanagedConst2D b_view(b, nrows, ncols); UnmanagedConst2D c_view(c, nrows, ncols); @@ -50,7 +46,7 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1. for (int i = slev + 1; i < elev; ++i) { Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { - double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); + T m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); c_p(jc, i) = c_view(jc, i) * m; d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m; }); @@ -71,10 +67,19 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c Kokkos::fence(); } + c_p = Kokkos::View<T**>(); + d_p = Kokkos::View<T**>(); // End timing and print the elapsed time auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } +extern "C" { + + void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout) { + tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + } } diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp new file mode 100644 index 0000000..20b1f44 --- /dev/null +++ b/src/support/mo_math_utilities.hpp @@ -0,0 +1,15 @@ +#include <vector> +#include <iostream> +#include <chrono> // For timing +#include <Kokkos_Core.hpp> + +template <typename T> +void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, T* varout); + +extern "C" { + void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); +} diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.f90 similarity index 100% rename from test/fortran/test_math_utilities.F90 rename to test/fortran/test_math_utilities.f90 -- GitLab From 14acf1aa28218d7754b8cfbac532653fd30a126b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 14:47:17 +0100 Subject: [PATCH 27/76] added unit-tests for c++ codes using googletest --- test/CMakeLists.txt | 3 +- test/c/CMakeLists.txt | 30 +++++++++++++++ test/c/main.cpp | 14 +++++++ test/c/test_tdma_solver.cpp | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 test/c/CMakeLists.txt create mode 100644 test/c/main.cpp create mode 100644 test/c/test_tdma_solver.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8fa8e2..2a5f5df 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,4 +9,5 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------- -add_subdirectory(fortran) +# add_subdirectory(fortran) +add_subdirectory(c) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt new file mode 100644 index 0000000..be1af9e --- /dev/null +++ b/test/c/CMakeLists.txt @@ -0,0 +1,30 @@ +# Fetch GoogleTest via FetchContent +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip +) +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +# Find Kokkos (or use your existing Kokkos installation) +# find_package(Kokkos REQUIRED) + +set(SOURCES + main.cpp + test_tdma_solver.cpp +) +# Create the test executable from your test files, including main.cpp. +add_executable(iconmath_test_c ${SOURCES}) + +# Link the test executable with GoogleTest and Kokkos. +target_link_libraries(iconmath_test_c + PUBLIC + iconmath-support + PRIVATE + gtest_main + Kokkos::kokkos +) + +include(GoogleTest) +gtest_discover_tests(iconmath_test_c) diff --git a/test/c/main.cpp b/test/c/main.cpp new file mode 100644 index 0000000..2df720d --- /dev/null +++ b/test/c/main.cpp @@ -0,0 +1,14 @@ +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> + +int main(int argc, char** argv) { + // Initialize Kokkos before any tests run. + Kokkos::initialize(argc, argv); + + ::testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + + // Finalize Kokkos after all tests have completed. + Kokkos::finalize(); + return result; +} diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp new file mode 100644 index 0000000..7c3c3a8 --- /dev/null +++ b/test/c/test_tdma_solver.cpp @@ -0,0 +1,77 @@ +#include <gtest/gtest.h> +#include <vector> +#include <algorithm> +#include "mo_math_utilities.hpp" + +// Helper function to compute the 1D index for column-major storage. +inline int idx(int i, int j, int nrows) { + return i + j * nrows; +} + +// Test fixture for the TDMA solver tests. +class TDMASolverTestFixture : public ::testing::Test { +protected: + const int n = 10; // Matrix dimension. + std::vector<double> a; // Input matrix a. + std::vector<double> b; // Input matrix b. + std::vector<double> c; // Input matrix c. + std::vector<double> d; // Input matrix d. + std::vector<double> x; // Output matrix. + + TDMASolverTestFixture() + : a(n * n), b(n * n), c(n * n), d(n * n), x(n * n, 0.0) {} + + // SetUp is run before each test. + void SetUp() override { + // Fill arrays in column-major order. + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + double value = (i + 1) + (j + 1); + a[idx(i, j, n)] = 1.0 * value; + b[idx(i, j, n)] = 2.0 * value; + c[idx(i, j, n)] = 1.0 * value; + d[idx(i, j, n)] = 1.0 * value; + } + } + // Clear the output vector. + std::fill(x.begin(), x.end(), 0.0); + } +}; + +TEST_F(TDMASolverTestFixture, FullTest) { + // Call the solver over the full range: + tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + 0, n, 0, n, n, n, x.data()); + + // Compute the sum of all elements in the output matrix. + double sum = 0.0; + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + sum += x[idx(i, j, n)]; + } + } + + // Expected reference sum + double sum_ref = 27.2727272727272769; + double tol = 1e-13; + EXPECT_NEAR(sum, sum_ref, tol); +} + +TEST_F(TDMASolverTestFixture, PartialTest) { + // Call the solver for a partial region: + // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1. + tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + 1, n - 1, 1, n - 1, n, n, x.data()); + + // Compute the sum over a region + double sum = 0.0; + for (int j = 1; j < n - 1; j++) { + for (int i = 1; i < n - 1; i++) { + sum += x[idx(i, j, n)]; + } + } + + double sum_ref = 17.7777777777777679; + double tol = 1e-13; + EXPECT_NEAR(sum, sum_ref, tol); +} -- GitLab From a8ef83a350adb9b3638ca4fbc5fb564e6f228a2b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 21:53:41 +0100 Subject: [PATCH 28/76] added c++ code for mo_lib_interpolation_vector --- src/interpolation/CMakeLists.txt | 11 ++ .../mo_lib_interpolation_vector.cpp | 130 ++++++++++++++++++ .../mo_lib_interpolation_vector.hpp | 38 +++++ src/support/mo_lib_loopindices.hpp | 11 ++ 4 files changed, 190 insertions(+) create mode 100644 src/interpolation/mo_lib_interpolation_vector.cpp create mode 100644 src/interpolation/mo_lib_interpolation_vector.hpp create mode 100644 src/support/mo_lib_loopindices.hpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 73e582c..9455f9e 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 + mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) @@ -55,10 +56,20 @@ target_include_directories( $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> # Path to internal include directory $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${PROJECT_SOURCE_DIR}/include>> + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>> ) target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-interpolation PUBLIC iconmath-support) +target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets") diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp new file mode 100644 index 0000000..50772e7 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -0,0 +1,130 @@ +#include "mo_lib_loopindices.hpp" +#include "mo_lib_interpolation_vector.hpp" + +// The templated C++ function using Kokkos. +// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. +// Note: The dimensions below must match the Fortran arrays. +// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] +// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] +// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] +// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); + + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); + + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); + + // Loop over cell blocks as in the original Fortran code. + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Call get_indices_c_lib to get inner loop indices for block jb. + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for("edges2cells_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + }); + // Optionally fence after each block if required. + Kokkos::fence(); + } +} + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<double>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<float>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp new file mode 100644 index 0000000..a764ada --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -0,0 +1,38 @@ +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp new file mode 100644 index 0000000..d53aa38 --- /dev/null +++ b/src/support/mo_lib_loopindices.hpp @@ -0,0 +1,11 @@ +extern "C" { + // get_indices_c_lib function + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); +} -- GitLab From ef511855f81132db90c962116e499df5a347d254 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:20:00 +0100 Subject: [PATCH 29/76] added license to the new files --- src/interpolation/mo_lib_interpolation_vector.cpp | 11 +++++++++++ src/interpolation/mo_lib_interpolation_vector.hpp | 11 +++++++++++ src/support/mo_lib_loopindices.hpp | 11 +++++++++++ src/support/mo_math_utilities.hpp | 11 +++++++++++ test/c/CMakeLists.txt | 13 +++++++++++++ test/c/main.cpp | 11 +++++++++++ test/c/test_tdma_solver.cpp | 11 +++++++++++ 7 files changed, 79 insertions(+) diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 50772e7..00a914a 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_vector.hpp" diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp index a764ada..0d19b24 100644 --- a/src/interpolation/mo_lib_interpolation_vector.hpp +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <Kokkos_Core.hpp> #include <vector> diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp index d53aa38..03eb977 100644 --- a/src/support/mo_lib_loopindices.hpp +++ b/src/support/mo_lib_loopindices.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + extern "C" { // get_indices_c_lib function void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp index 20b1f44..4ee5dc9 100644 --- a/src/support/mo_math_utilities.hpp +++ b/src/support/mo_math_utilities.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <vector> #include <iostream> #include <chrono> // For timing diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index be1af9e..52225a7 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -1,3 +1,14 @@ +# ICON +# +# --------------------------------------------------------------- +# Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +# Contact information: icon-model.org +# +# See AUTHORS.TXT for a list of authors +# See LICENSES/ for license information +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------- + # Fetch GoogleTest via FetchContent include(FetchContent) FetchContent_Declare( @@ -13,6 +24,7 @@ FetchContent_MakeAvailable(googletest) set(SOURCES main.cpp test_tdma_solver.cpp + test_interpolation_vector.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) @@ -21,6 +33,7 @@ add_executable(iconmath_test_c ${SOURCES}) target_link_libraries(iconmath_test_c PUBLIC iconmath-support + iconmath-interpolation PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/main.cpp b/test/c/main.cpp index 2df720d..bd0fadc 100644 --- a/test/c/main.cpp +++ b/test/c/main.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <Kokkos_Core.hpp> #include <gtest/gtest.h> diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 7c3c3a8..8f120ef 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <gtest/gtest.h> #include <vector> #include <algorithm> -- GitLab From 2ecbb8b326d01350a076f523593c0722314436a2 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:23:09 +0100 Subject: [PATCH 30/76] fixed a bug in cmake style --- test/c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 52225a7..95ca08b 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -15,7 +15,7 @@ FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip ) -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) # Find Kokkos (or use your existing Kokkos installation) -- GitLab From ece90a5a8596df81592d309112d50cc4dd68c2ed Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:25:39 +0100 Subject: [PATCH 31/76] added test for interpolation_vector --- test/c/test_interpolation_vector.cpp | 123 +++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 test/c/test_interpolation_vector.cpp diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp new file mode 100644 index 0000000..dc70a63 --- /dev/null +++ b/test/c/test_interpolation_vector.cpp @@ -0,0 +1,123 @@ +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_interpolation_vector.hpp" + +// Dimensions for the test (small, trivial test). +// We assume Fortran ordering: column-major, but our C wrappers will wrap raw pointers into Kokkos::Views with LayoutLeft. +constexpr int nproma = 2; +constexpr int nlev = 3; +constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) +constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients + +// For the get_indices_c_lib inputs. +constexpr int i_startblk = 0; +constexpr int i_endblk = 1; // two blocks: indices 0 and 1 +constexpr int i_startidx_in = 0; +constexpr int i_endidx_in = nproma - 1; // 0 and 1 +constexpr int slev = 0; +constexpr int elev = nlev - 1; // 0 .. 2 + +// Helper to compute total number of elements for a 3D array stored in column-major order. +template<typename T> +size_t num_elements(int dim1, int dim2, int dim3) { + return static_cast<size_t>(dim1) * dim2 * dim3; +} + +// Test for the double precision (dp) version. +TEST(Edges2CellsTest, DPTest) { + // Allocate and fill input arrays. + std::vector<double> p_vn_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); + std::vector<double> p_vt_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); + // cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] + std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); + std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); + + // Here we set cell_edge_idx to 1, 2, 1 for every triple. + for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { + cell_edge_idx[i] = 1; + cell_edge_idx[i+1] = 2; + cell_edge_idx[i+2] = 1; + } + // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 means block 0 after subtracting 1). + // e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] + std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0); + std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0); + // Output arrays: dimensions [nproma, nlev, nblks_c] + std::vector<double> p_u_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); + std::vector<double> p_v_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); + + std::vector<double> p_u_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); + std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); + + // Call the dp (double precision) version. + edges2cells_vector_lib_dp( + p_vn_in.data(), p_vt_in.data(), + cell_edge_idx.data(), cell_edge_blk.data(), + e_bln_c_u.data(), e_bln_c_v.data(), + p_u_out.data(), p_v_out.data(), + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); + + // Check that for each computed cell in p_u_out and p_v_out, the value is 6. + // This is because for each cell, the kernel adds 6 terms of 1*1. + p_u_ref[0] = 0.0; + p_u_ref[8] = 0.0; + p_u_ref[10] = 0.0; + p_v_ref[0] = 0.0; + p_v_ref[8] = 0.0; + p_v_ref[10] = 0.0; + for (size_t idx = 0; idx < p_u_out.size(); ++idx) { + EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12); + EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12); + } +} + +// Test for the single precision (sp) version. +TEST(Edges2CellsTest, SPTest) { + // Allocate and fill input arrays. + std::vector<float> p_vn_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); + std::vector<float> p_vt_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); + std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); + std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); + // Set cell_edge_idx values to 1, 2, 1. + for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { + cell_edge_idx[i] = 1; + cell_edge_idx[i+1] = 2; + cell_edge_idx[i+2] = 1; + } + std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f); + std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f); + std::vector<float> p_u_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); + std::vector<float> p_v_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); + + std::vector<float> p_u_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); + std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); + + // Call the sp (float precision) version. + edges2cells_vector_lib_sp( + p_vn_in.data(), p_vt_in.data(), + cell_edge_idx.data(), cell_edge_blk.data(), + e_bln_c_u.data(), e_bln_c_v.data(), + p_u_out.data(), p_v_out.data(), + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); + + p_u_ref[0] = 0.0f; + p_u_ref[8] = 0.0f; + p_u_ref[10] = 0.0f; + p_v_ref[0] = 0.0f; + p_v_ref[8] = 0.0f; + p_v_ref[10] = 0.0f; + // Verify that every computed output equals 6. + for (size_t idx = 0; idx < p_u_out.size(); ++idx) { + EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f); + EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-5f); + } +} -- GitLab From 089863dace1a3bc676869defec19ec350c9ab0cf Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:27:21 +0100 Subject: [PATCH 32/76] added licence to the new test file --- test/c/test_interpolation_vector.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index dc70a63..a33673c 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> -- GitLab From be8fea690c3f9ab9ab40759a7542fa115b9d0422 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 09:25:16 +0100 Subject: [PATCH 33/76] modified CMakeLists.txt of horizontal to enable compilation of C++ code that uses Kokkos --- src/horizontal/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 7515842..6cebed9 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -52,11 +52,21 @@ target_include_directories( # Path to the Fortran modules: $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>> $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>> ) target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation) +target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets") -- GitLab From 270cb827741578439a02cb941d6dd5a11473a577 Mon Sep 17 00:00:00 2001 From: Georgiana Mania <mania@dkrz.de> Date: Mon, 24 Feb 2025 12:32:00 +0000 Subject: [PATCH 34/76] configure compilation for kokkos + nvidia (icon-libraries/libiconmath!31) Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: default --- CMakeLists.txt | 11 +++++++++++ src/horizontal/CMakeLists.txt | 3 ++- src/interpolation/CMakeLists.txt | 1 + src/support/CMakeLists.txt | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cf92be..affedaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF) option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF) option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF) option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF) +set(IM_ENABLE_GPU OFF CACHE STRING "Enable Kokkos GPU support for arch. Valid values: OFF, nvidia-sm80") # GNUInstallDirs issues a warning if CMAKE_SIZEOF_VOID_P is not defined, which # is the case with NAG. One way to circumvent that is to enable C language for @@ -124,6 +125,16 @@ set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend") set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations") +if ("${IM_ENABLE_GPU}" STREQUAL "nvidia-sm80") + # NVIDIA A100 + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Kokkos CUDA backend") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "CUDA architecture: Ampere cc80") +endif() + +if (${IM_ENABLE_OPENMP}) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Kokkos OpenMP backend") +endif() + FetchContent_MakeAvailable(kokkos) add_subdirectory(src) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 6cebed9..d2abe7d 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -66,7 +66,8 @@ target_include_directories( target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation) -target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) +target_link_libraries(iconmath-horizontal PRIVATE Kokkos::kokkos) +set_target_properties(iconmath-horizontal PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets") diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 9455f9e..346aaaa 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -70,6 +70,7 @@ target_include_directories( target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-interpolation PUBLIC iconmath-support) target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) +set_target_properties(iconmath-interpolation PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets") diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 9f56017..ed6a4d3 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -80,6 +80,7 @@ target_link_libraries(iconmath-support PRIVATE Kokkos::kokkos ) +set_target_properties(iconmath-support PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From 417e6cdb7cb875c4c884941353f153d5c988f813 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 21:57:30 +0100 Subject: [PATCH 35/76] added openacc_fortran_options only for the compilation of Fortran codes fixed a cmake style added to two other components --- src/horizontal/CMakeLists.txt | 4 +++- src/interpolation/CMakeLists.txt | 3 ++- src/support/CMakeLists.txt | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index d2abe7d..078a14d 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -39,7 +39,9 @@ if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) - target_compile_options(iconmath-horizontal PRIVATE ${OpenACC_Fortran_OPTIONS}) + target_compile_options(iconmath-horizontal + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-horizontal PRIVATE OpenACC::OpenACC_Fortran) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 346aaaa..f982f3b 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -41,7 +41,8 @@ if(IM_ENABLE_OPENACC) # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) target_compile_options(iconmath-interpolation - PRIVATE ${OpenACC_Fortran_OPTIONS}) + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-interpolation PRIVATE OpenACC::OpenACC_Fortran) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index ed6a4d3..e78fc16 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -50,7 +50,9 @@ if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) - target_compile_options(iconmath-support PRIVATE ${OpenACC_Fortran_OPTIONS}) + target_compile_options(iconmath-support + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-support PRIVATE OpenACC::OpenACC_Fortran) -- GitLab From c358ed6e4e983fdf4e63d712aa32683de89ed995 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Mon, 24 Feb 2025 21:30:54 +0000 Subject: [PATCH 36/76] Added a new argument to the functions of `mo_lib_loopindices.cpp` to fix a bug regarding startindex (icon-libraries/libiconmath!32) This made the code to produce bit-identical results for both Fortran and C++ Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: bugfix --- src/support/CMakeLists.txt | 3 +- src/support/mo_lib_loopindices.cpp | 100 +++++++++++++++++---------- src/support/mo_lib_loopindices.hpp | 20 +++--- src/support/mo_math_utilities.F90 | 8 +-- src/support/mo_math_utilities.cpp | 16 +++-- src/support/mo_math_utilities.hpp | 8 +-- src/support/support_bindings.cpp | 50 ++++++++++++++ src/support/support_bindings.h | 33 +++++++++ test/c/test_interpolation_vector.cpp | 12 ---- test/c/test_tdma_solver.cpp | 4 +- 10 files changed, 176 insertions(+), 78 deletions(-) create mode 100644 src/support/support_bindings.cpp create mode 100644 src/support/support_bindings.h diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index e78fc16..44f60aa 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -19,7 +19,8 @@ add_library( mo_math_types.f90 mo_math_utilities.cpp mo_math_utilities.F90 - mo_random_number_generators.F90) + mo_random_number_generators.F90 + support_bindings.cpp) add_library(${PROJECT_NAME}::support ALIAS iconmath-support) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index e6d9d21..30c82bd 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -11,47 +11,75 @@ #include <algorithm> // For std::max -extern "C" { - // get_indices_c_lib function - void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - if (i_blk == i_startblk) { - i_startidx_out = std::max(1, i_startidx_in); - i_endidx_out = nproma; - if (i_blk == i_endblk) { - i_endidx_out = i_endidx_in; - } - } else if (i_blk == i_endblk) { - i_startidx_out = 1; +// get_indices_c_lib function +void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from + //(either fortran or c++), the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; + + if (i_blk == i_startblk) { + i_startidx_out = std::max(first_index, i_startidx_in); + i_endidx_out = nproma; + if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; - } else { - i_startidx_out = 1; - i_endidx_out = nproma; } + } else if (i_blk == i_endblk) { + i_startidx_out = first_index; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = first_index; + i_endidx_out = nproma; } +} - // get_indices_e_lib function - void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in); - i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; - } +// get_indices_e_lib function +void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + //the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; + + i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in); + i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; +} + +// get_indices_v_lib function +void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + //the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; - // get_indices_v_lib function - void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - if (i_blk == i_startblk) { - i_startidx_out = i_startidx_in; - i_endidx_out = nproma; - if (i_blk == i_endblk) { - i_endidx_out = i_endidx_in; - } - } else if (i_blk == i_endblk) { - i_startidx_out = 1; + if (i_blk == i_startblk) { + i_startidx_out = i_startidx_in; + i_endidx_out = nproma; + if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; - } else { - i_startidx_out = 1; - i_endidx_out = nproma; } + } else if (i_blk == i_endblk) { + i_startidx_out = first_index; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = first_index; + i_endidx_out = nproma; } -} +} \ No newline at end of file diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp index 03eb977..5136c6a 100644 --- a/src/support/mo_lib_loopindices.hpp +++ b/src/support/mo_lib_loopindices.hpp @@ -8,15 +8,17 @@ // See LICENSES/ for license information // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#pragma once -extern "C" { - // get_indices_c_lib function - void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); +void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); - void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); +void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); - void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); -} +void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); + \ No newline at end of file diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index bb8e6df..56e3d25 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -82,7 +82,7 @@ MODULE mo_math_utilities #ifndef __USE_CPP_BINDINGS PUBLIC :: tdma_solver_vec #else - PUBLIC :: tdma_solver_vec_double + PUBLIC :: tdma_solver_vec_dp #endif PUBLIC :: check_orientation @@ -251,14 +251,14 @@ CONTAINS ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec_double") + SUBROUTINE tdma_solver_vec_dp(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec_dp") IMPORT :: c_double, c_int REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(c_double), INTENT(OUT) :: varout(*) INTEGER(c_int), OPTIONAL :: opt_acc_queue - END SUBROUTINE tdma_solver_vec_double + END SUBROUTINE tdma_solver_vec_dp END INTERFACE CONTAINS diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index 6a60f2c..5859b9d 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -75,11 +75,13 @@ void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } -extern "C" { +template +void tdma_solver_vec<double>(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); + +template +void tdma_solver_vec<float>(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout); - void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout) { - tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); - } -} diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp index 4ee5dc9..a3f3ba1 100644 --- a/src/support/mo_math_utilities.hpp +++ b/src/support/mo_math_utilities.hpp @@ -17,10 +17,4 @@ template <typename T> void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, int slev, int elev, int startidx, int endidx, - int nrows, int ncols, T* varout); - -extern "C" { - void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout); -} + int nrows, int ncols, T* varout); \ No newline at end of file diff --git a/src/support/support_bindings.cpp b/src/support/support_bindings.cpp new file mode 100644 index 0000000..664fc1e --- /dev/null +++ b/src/support/support_bindings.cpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "support_bindings.h" +#include "mo_lib_loopindices.hpp" +#include "mo_math_utilities.hpp" + + +// mo_loop_indices.F90 +// C wrappers for C++ functionality +void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, + i_endblk, i_startidx_out, i_endidx_out, false); +} +void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_e_lib(i_startidx_in, i_endidx_in, nproma,i_blk, i_startblk, i_endblk, + i_startidx_out, i_endidx_out, false); +} + +void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, + i_startidx_out,i_endidx_out, false); +} + +void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout){ + + tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + +} + +void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout){ + + tdma_solver_vec<float>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + +} diff --git a/src/support/support_bindings.h b/src/support/support_bindings.h new file mode 100644 index 0000000..df452e4 --- /dev/null +++ b/src/support/support_bindings.h @@ -0,0 +1,33 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +extern "C" { + // mo_loop_indices.F90 + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + //mo_math_utilities.F90 + void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); + + void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout); + +} diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index a33673c..0eb5a8d 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -75,12 +75,6 @@ TEST(Edges2CellsTest, DPTest) { // Check that for each computed cell in p_u_out and p_v_out, the value is 6. // This is because for each cell, the kernel adds 6 terms of 1*1. - p_u_ref[0] = 0.0; - p_u_ref[8] = 0.0; - p_u_ref[10] = 0.0; - p_v_ref[0] = 0.0; - p_v_ref[8] = 0.0; - p_v_ref[10] = 0.0; for (size_t idx = 0; idx < p_u_out.size(); ++idx) { EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12); EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12); @@ -120,12 +114,6 @@ TEST(Edges2CellsTest, SPTest) { nproma, nlev, nblks_e, nblks_c); - p_u_ref[0] = 0.0f; - p_u_ref[8] = 0.0f; - p_u_ref[10] = 0.0f; - p_v_ref[0] = 0.0f; - p_v_ref[8] = 0.0f; - p_v_ref[10] = 0.0f; // Verify that every computed output equals 6. for (size_t idx = 0; idx < p_u_out.size(); ++idx) { EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f); diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 8f120ef..4e09ff3 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -51,7 +51,7 @@ protected: TEST_F(TDMASolverTestFixture, FullTest) { // Call the solver over the full range: - tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), 0, n, 0, n, n, n, x.data()); // Compute the sum of all elements in the output matrix. @@ -71,7 +71,7 @@ TEST_F(TDMASolverTestFixture, FullTest) { TEST_F(TDMASolverTestFixture, PartialTest) { // Call the solver for a partial region: // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1. - tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), 1, n - 1, 1, n - 1, n, n, x.data()); // Compute the sum over a region -- GitLab From a5c1b54e781ff85dacad451a492cc8f44e626e44 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 21:46:48 +0000 Subject: [PATCH 37/76] cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib (icon-libraries/libiconmath!34) ## What is the new feature cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib ## How is it implemented Kept separate file from other `mo_lib_intp_rbf` routines to avoid merge conflicts. Will be resolved by Ali and Dylan later. Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- src/interpolation/CMakeLists.txt | 5 +- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ++++++++++++++++++ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 +++ ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++ ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 +++++ test/c/CMakeLists.txt | 1 + test/c/test_intp_rbf.cpp | 126 +++++++++++ 7 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h create mode 100644 test/c/test_intp_rbf.cpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index f982f3b..37c3ad0 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -14,7 +14,10 @@ add_library( mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp - mo_lib_intp_rbf.F90) + mo_lib_intp_rbf.F90 + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp new file mode 100644 index 0000000..c9b776e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -0,0 +1,197 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() +/// +/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf + +#include <type_traits> +#include <Kokkos_Core.hpp> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v + ) +{ + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + + + + // input components of velocity or horizontal vorticity vectors at edge midpoints + // dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex + // (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), + // dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + //int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb=i_startblk; jb <= i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + } + ); + } +} + +// Explicit instantiation - double precision +template +void rbf_vec_interpol_vertex_lib<double, double>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - single precision +template +void rbf_vec_interpol_vertex_lib<float, float>( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - mixed precision +template +void rbf_vec_interpol_vertex_lib<double, float>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp new file mode 100644 index 0000000..c0b6f05 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_c + ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp new file mode 100644 index 0000000..06dc467 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp @@ -0,0 +1,134 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h new file mode 100644 index 0000000..4356f88 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h @@ -0,0 +1,54 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +extern "C" { + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +} \ No newline at end of file diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 95ca08b..13c5dfe 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -25,6 +25,7 @@ set(SOURCES main.cpp test_tdma_solver.cpp test_interpolation_vector.cpp + test_intp_rbf.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp new file mode 100644 index 0000000..0aa4f9b --- /dev/null +++ b/test/c/test_intp_rbf.cpp @@ -0,0 +1,126 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template<typename T> +size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template<typename T> +size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template<typename InT, typename OutT> +struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + +// Define a typed test fixture. +template <typename TypePair> +class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + + // Parameter values. + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + + // Arrays stored in std::vector. + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolVertexMixedTestFixture() { + // Allocate and initialize inputs. + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); + rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); + rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); + +TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function with mixed precision. + rbf_vec_interpol_vertex_lib<InType, OutType>( + this->p_e_in.data(), + this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), + this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), + this->p_v_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->lacc, + this->acc_async, + this->nlev, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + + // Check the outputs only for blocks in the range [i_startblk, i_endblk]. + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = 0; level < this->nlev; ++level) { + for (int i = 0; i < this->nproma; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab From 207af46ec5ace18f1beeda09ed764713a9b2727e Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Wed, 5 Mar 2025 17:08:17 +0000 Subject: [PATCH 38/76] First Kokkos version of 'mo_lib_interpolation_scalar' (icon-libraries/libiconmath!36) Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Co-authored-by: Dylan Kierans <kierans@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- src/interpolation/CMakeLists.txt | 2 + src/interpolation/interpolation_bindings.cpp | 328 ++++++++ src/interpolation/interpolation_bindings.h | 188 +++++ .../mo_lib_interpolation_scalar.cpp | 753 ++++++++++++++++++ .../mo_lib_interpolation_scalar.hpp | 90 +++ .../mo_lib_interpolation_vector.cpp | 179 ++--- .../mo_lib_interpolation_vector.hpp | 52 +- test/c/CMakeLists.txt | 5 + test/c/test_interpolation_scalar.cpp | 532 +++++++++++++ test/c/test_interpolation_vector.cpp | 75 +- 10 files changed, 2030 insertions(+), 174 deletions(-) create mode 100644 src/interpolation/interpolation_bindings.cpp create mode 100644 src/interpolation/interpolation_bindings.h create mode 100644 src/interpolation/mo_lib_interpolation_scalar.cpp create mode 100644 src/interpolation/mo_lib_interpolation_scalar.hpp create mode 100644 test/c/test_interpolation_scalar.cpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 37c3ad0..1051516 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -12,11 +12,13 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 + mo_lib_interpolation_scalar.cpp mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp + interpolation_bindings.cpp ) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp new file mode 100644 index 0000000..628f411 --- /dev/null +++ b/src/interpolation/interpolation_bindings.cpp @@ -0,0 +1,328 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "interpolation_bindings.h" +#include "mo_lib_interpolation_scalar.hpp" +#include "mo_lib_interpolation_vector.hpp" + +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib +// (wp=dp) +void edges2cells_vector_lib_dp(const double *p_vn_in, const double *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, + const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, + int nblks_e, int nblks_c) { + + edges2cells_vector_lib<double>( + p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); +} + +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib +// (wp=sp) +void edges2cells_vector_lib_sp(const float *p_vn_in, const float *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, + const float *e_bln_c_v, float *p_u_out, + float *p_v_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_e, + int nblks_c) { + + edges2cells_vector_lib<float>( + p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); +} + +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib +// (wp=dp) +void verts2edges_scalar_lib_dp( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc) { + + verts2edges_scalar_lib<double>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, + coeff_int, p_edge_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_v, nblks_e, lacc); +} + +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib +// (wp=sp) +void verts2edges_scalar_lib_sp( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc) { + + verts2edges_scalar_lib<float>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, + coeff_int, p_edge_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_v, nblks_e, lacc); +} + +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_dp_lib +void cells2edges_scalar_lib_dp( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<double, double>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); +} + +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp_lib +void cells2edges_scalar_lib_sp(const float *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const float *coeff_int, + float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<float, float>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); +} + +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp2dp_lib +void cells2edges_scalar_lib_sp2dp( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<float, double>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); +} + +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib +// (wp=dp) +void edges2verts_scalar_lib_dp( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc) { + + edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int, + p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_e, nblks_v, lacc); +} + +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib +// (wp=sp) +void edges2verts_scalar_lib_sp(const float *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const float *v_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc) { + + edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int, + p_vert_out, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_e, + nblks_v, lacc); +} + +// This is the binding for mo_interpolation_scalar::edges2cells_scalar_dp_lib +void edges2cells_scalar_lib_dp(const double *p_edge_in, const int *edge_idx, + const int *edge_blk, const double *coeff_int, + double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + edges2cells_scalar_lib<double>(p_edge_in, edge_idx, edge_blk, coeff_int, + p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_e, nblks_c, lacc); +} + +// This is the binding for mo_interpolation_scalar::edges2cells_scalar_sp_lib +void edges2cells_scalar_lib_sp(const float *p_edge_in, const int *edge_idx, + const int *edge_blk, const float *coeff_int, + float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + + edges2cells_scalar_lib<float>(p_edge_in, edge_idx, edge_blk, coeff_int, + p_cell_out, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_e, + nblks_c, lacc); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp_lib +void cells2verts_scalar_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_lib<double, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp2sp_lib +void cells2verts_scalar_lib_dp2sp( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_lib<float, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_sp_lib +void cells2verts_scalar_lib_sp(const float *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const float *coeff_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + cells2verts_scalar_lib<float, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=dp, vp=dp) +void cells2verts_scalar_ri_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<double, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=dp, vp=sp) +void cells2verts_scalar_ri_lib_dp2sp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<double, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=sp, vp=sp) +void cells2verts_scalar_ri_lib_sp( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<float, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); +} + +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib +// (wp=dp) +void verts2cells_scalar_lib_dp( + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc) { + verts2cells_scalar_lib<double>(p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, slev, + elev, nproma, nlev, nblks_v, lacc); +} + +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib +// (wp=sp) +void verts2cells_scalar_lib_sp( + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc) { + verts2cells_scalar_lib<float>(p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, slev, + elev, nproma, nlev, nblks_v, lacc); +} + +// This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=dp) +void cell_avg_lib_dp(const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, + double *avg_psi_c, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_c, + const bool lacc) { + cell_avg_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, avg_coeff, + avg_psi_c, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); +} + +// This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=sp) +void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { + cell_avg_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, avg_coeff, + avg_psi_c, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); +} diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h new file mode 100644 index 0000000..7cb873d --- /dev/null +++ b/src/interpolation/interpolation_bindings.h @@ -0,0 +1,188 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +extern "C" { + +// mo_lib_interpolation_vector.F90 +void edges2cells_vector_lib_dp(const double *p_vn_in, const double *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, + const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, + int nblks_e, int nblks_c); + +void edges2cells_vector_lib_sp(const float *p_vn_in, const float *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, + const float *e_bln_c_v, float *p_u_out, + float *p_v_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_e, + int nblks_c); + +// mo_lib_interpolation_scalar.F90 +void verts2edges_scalar_lib_dp( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +void verts2edges_scalar_lib_sp( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +void cells2edges_scalar_lib_dp( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +void cells2edges_scalar_lib_sp(const float *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const float *coeff_int, + float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc); + +void cells2edges_scalar_lib_sp2dp( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +void edges2verts_scalar_lib_dp( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +void edges2verts_scalar_lib_sp(const float *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const float *v_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc); + +void edges2cells_scalar_lib_dp(const double *p_edge_in, const int *edge_idx, + const int *edge_blk, const double *coeff_int, + double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc); +void edges2cells_scalar_lib_sp(const float *p_edge_in, const int *edge_idx, + const int *edge_blk, const float *coeff_int, + float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc); + +///////////////////////////////////////////// + +void cells2verts_scalar_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); +void cells2verts_scalar_lib_dp2sp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); +void cells2verts_scalar_lib_sp(const float *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const float *coeff_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +///////////////////////////////////////////// + +void cells2verts_scalar_ri_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +void cells2verts_scalar_ri_lib_dp2sp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +void cells2verts_scalar_ri_lib_sp( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +///////////////////////////////////////////// + +void verts2cells_scalar_lib_dp( + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +void verts2cells_scalar_lib_sp( + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +///////////////////////////////////////////// + +void cell_avg_lib_dp(const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, + double *avg_psi_c, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_c, + const bool lacc); +void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); +} diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp new file mode 100644 index 0000000..9e4e6c5 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -0,0 +1,753 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_interpolation_scalar.hpp" +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <iostream> + +//----------------------------------------------------------------------- +// +// ! averaging and interpolation routines and +// ! routines needed to compute the coefficients therein +// +//----------------------------------------------------------------------- + +//----------------------------------------------------------------------- +//> +/// Performs average of scalar fields from vertices to velocity points. +/// +/// The coefficients are given by coeff_int. +/// +template <typename T> +void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const T *coeff_int, + T *p_edge_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_v, const int nblks_e, + const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_vertex_in_view(p_vertex_in, nproma, nlev, nblks_v); + UnmanagedConstInt3D iidx_view(edge_vertex_idx, nproma, nblks_e, 4); + UnmanagedConstInt3D iblk_view(edge_vertex_blk, nproma, nblks_e, 4); + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 2, nblks_e); + UnmanagedT3D p_edge_out_view(p_edge_out, nproma, nlev, nblks_e); + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "verts2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_edge_out_view(je, jk, jb) = + coeff_int_view(je, 0, jb) * + p_vertex_in_view(iidx_view(je, jb, 0), jk, + iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb) * + p_vertex_in_view(iidx_view(je, jb, 1), jk, + iblk_view(je, jb, 1)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------ +//> +/// Computes average of scalar fields from centers of triangular faces to. +/// +/// Computes average of scalar fields from centers of triangular faces to +/// velocity points. +/// +template <typename T, typename S> +void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const S *coeff_int, + S *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstS3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblk_c); + UnmanagedConstInt3D iidx_view(edge_cell_idx, nproma, nblks_e, 2); + UnmanagedConstInt3D iblk_view(edge_cell_blk, nproma, nblks_e, 2); + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 2, nblks_e); + UnmanagedS3D p_edge_out_view(p_edge_out, nproma, nlev, nblks_e); + + // Fill outermost nest boundary + int i_startblk, i_endblk; + if ((l_limited_area || patch_id > 0) && (lfill_latbc)) { + i_startblk = i_startblk_in[0]; + i_endblk = i_endblk_in[0]; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0) { + p_edge_out_view(je, jk, jb) = p_cell_in_view( + iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); + } else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0) { + p_edge_out_view(je, jk, jb) = p_cell_in_view( + iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + } else { + std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in " + "lateral boundary filling" + << std::endl; + std::exit(EXIT_FAILURE); + } + }); + Kokkos::fence(); + } + } else { + // Process the remaining grid points for which a real interpolation is + // possible + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_edge_out_view(je, jk, jb) = + coeff_int_view(je, 0, jb) * + p_cell_in_view(iidx_view(je, jb, 0), jk, + iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb) * + p_cell_in_view(iidx_view(je, jb, 1), jk, + iblk_view(je, jb, 1)); + }); + Kokkos::fence(); + } + } +} + +//------------------------------------------------------------------------ +//> +/// Computes average of scalar fields from velocity points to. +/// +/// Computes average of scalar fields from velocity points to +/// centers of dual faces. +/// +template <typename T> +void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const T *v_int, + T *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D iidx_view(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk_view(vert_edge_blk, nproma, nblks_v, 6); + UnmanagedConstT3D v_int_view(v_int, nproma, 6, nblks_v); + UnmanagedT3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "edges2verts_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + p_vert_out_view(jv, jk, jb) = + v_int_view(jv, 0, jb) * p_edge_in_view(iidx_view(jv, jb, 0), jk, + iblk_view(jv, jb, 0)) + + v_int_view(jv, 1, jb) * p_edge_in_view(iidx_view(jv, jb, 1), jk, + iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb) * p_edge_in_view(iidx_view(jv, jb, 2), jk, + iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb) * p_edge_in_view(iidx_view(jv, jb, 3), jk, + iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb) * p_edge_in_view(iidx_view(jv, jb, 4), jk, + iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb) * p_edge_in_view(iidx_view(jv, jb, 5), jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------ +//> +/// Computes interpolation from edges to cells +/// +/// Computes interpolation of scalar fields from velocity points to +/// cell centers via given interpolation weights +/// +template <typename T> +void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx, + const int *edge_blk, const T *coeff_int, + T *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // edge based scalar input field, dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + + // line indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view + + // block indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view + + // coefficients for (area weighted) interpolation, dim: + // (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // cell based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "edges2cells_scalar_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb) * p_edge_in_view(iidx_view(jc, jb, 0), + jk, + iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb) * p_edge_in_view(iidx_view(jc, jb, 1), + jk, + iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb) * p_edge_in_view(iidx_view(jc, jb, 2), + jk, + iblk_view(jc, jb, 2)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------ +//> +/// Computes average of scalar fields from centers of cells to vertices. +/// +template <typename T, typename S> +void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const S *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstS3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, + 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, + 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 6, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2verts_scalar_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + p_vert_out_view(jv, jk, jb) = + coeff_int_view(jv, 0, jb) * p_cell_in_view(iidx_view(jv, jb, 0), + jk, + iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb) * p_cell_in_view(iidx_view(jv, jb, 1), + jk, + iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb) * p_cell_in_view(iidx_view(jv, jb, 2), + jk, + iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb) * p_cell_in_view(iidx_view(jv, jb, 3), + jk, + iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb) * p_cell_in_view(iidx_view(jv, jb, 4), + jk, + iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb) * p_cell_in_view(iidx_view(jv, jb, 5), + jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------- +//> +/// Same as above, but provides output optionally in single precision and +/// assumes reversed index order of the output field in loop exchange mode +/// +template <typename T, typename S> +void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const T *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, + 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, + 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 6, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) +#ifdef __LOOP_EXCHANGE + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); +#else + UnmanagedS3D p_vert_out_view(p_vert_out, nlev, nproma, nblks_c); +#endif + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2verts_scalar_ri_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + +#ifdef __LOOP_EXCHANGE + p_vert_out_view(jv, jk, jb) = +#else + p_vert_out_view(jk, jv, jb) = +#endif + coeff_int_view(jv, 0, jb) * p_cell_in_view(iidx_view(jv, jb, 0), + jk, + iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb) * p_cell_in_view(iidx_view(jv, jb, 1), + jk, + iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb) * p_cell_in_view(iidx_view(jv, jb, 2), + jk, + iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb) * p_cell_in_view(iidx_view(jv, jb, 3), + jk, + iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb) * p_cell_in_view(iidx_view(jv, jb, 4), + jk, + iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb) * p_cell_in_view(iidx_view(jv, jb, 5), + jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------- +//> +/// Computes average of scalar fields from vertices to centers of cells. +/// +template <typename T> +void verts2cells_scalar_lib(const T *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const T *coeff_int, + T *p_cell_out, const int nblks_c, + const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, + const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_v) + UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); + + // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, + 3); // cell_vertex_idx + + // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, + 3); // cell_vertex_blk + + // coefficients for interpolation, dim: (nproma, 3, nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + for (int jb = 0; jb < nblks_c; ++jb) { + + int nlen; + if (jb != nblks_c) { + nlen = nproma; + } else { + nlen = npromz_c; + } + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, 0}, + {elev + 1, nlen}); + + Kokkos::parallel_for( + "cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb) * p_vert_in_view(iidx_view(jc, jb, 0), + jk, + iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb) * p_vert_in_view(iidx_view(jc, jb, 1), + jk, + iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb) * p_vert_in_view(iidx_view(jc, jb, 2), + jk, + iblk_view(jc, jb, 2)); + }); + Kokkos::fence(); + } +} + +//------------------------------------------------------------------------- +//> +/// Computes the average of a cell-based variable. +/// +/// Computes the average of a cell-based variable +/// over its original location and the neighboring triangles. +/// Version with variable weighting coefficients, computed such that +/// linear horizontal gradients are not aliased into a checkerboard noise +/// input: lives on centers of triangles +/// output: lives on centers of triangles +/// +template <typename T> +void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *avg_coeff, + T *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based variable before averaging, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); + // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, + 3); // cell_neighbour_idx + // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, + 3); // cell_neighbour_blk + // averaging coefficients, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + // cell based variable after averaging, dim: (nproma,nlev,nblks_c) + UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // calculate the weighted average + + avg_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + Kokkos::fence(); + } +} + +//----------------------------------------------------------------------- +// +// Explicit Instantiations +// +//----------------------------------------------------------------------- + +template void verts2edges_scalar_lib<double>( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template void verts2edges_scalar_lib<float>( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template void cells2edges_scalar_lib<double, double>( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template void cells2edges_scalar_lib<float, float>( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const float *coeff_int, float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +// sp2dp +template void cells2edges_scalar_lib<float, double>( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template void edges2verts_scalar_lib<double>( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +template void edges2verts_scalar_lib<float>( + const float *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const float *v_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +template void edges2cells_scalar_lib<double>( + const double *p_edge_in, const int *edge_idx, const int *edge_blk, + const double *coeff_int, double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, const bool lacc); + +template void edges2cells_scalar_lib<float>( + const float *p_edge_in, const int *edge_idx, const int *edge_blk, + const float *coeff_int, float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, const bool lacc); + +template void cells2verts_scalar_lib<double, double>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_lib<float, double>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_lib<float, float>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_ri_lib<double, double>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_ri_lib<double, float>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_ri_lib<float, float>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void verts2cells_scalar_lib<double>( + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +template void verts2cells_scalar_lib<float>( + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +template void cell_avg_lib<double>( + const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, double *avg_psi_c, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); + +template void +cell_avg_lib<float>(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp new file mode 100644 index 0000000..8c8d2de --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -0,0 +1,90 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T> +void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const T *coeff_int, + T *p_edge_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_v, const int nblks_e, + const bool lacc); +; + +template <typename T, typename S> +void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const S *coeff_int, + S *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc); + +template <typename T> +void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const T *v_int, + T *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc); + +template <typename T> +void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx, + const int *edge_blk, const T *coeff_int, + T *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc); + +template <typename T, typename S> +void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const S *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T, typename S> +void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const T *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T> +void verts2cells_scalar_lib(const T *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const T *coeff_int, + T *p_cell_out, const int nblks_c, + const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, + const bool lacc); + +template <typename T> +void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *avg_coeff, + T *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 00a914a..8e6a28e 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -9,34 +9,27 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_vector.hpp" -// The templated C++ function using Kokkos. -// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. -// Note: The dimensions below must match the Fortran arrays. -// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] -// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] -// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] -// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] template <typename T> -void edges2cells_vector_lib( - const T* p_vn_in, const T* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* e_bln_c_u, const T* e_bln_c_v, - T* p_u_out, T* p_v_out, - // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - // Dimensions for the arrays. - int nlev, int nblks_e, int nblks_c) -{ +void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in, + const int *cell_edge_idx, const int *cell_edge_blk, + const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out, + T *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c) { + // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); @@ -54,88 +47,76 @@ void edges2cells_vector_lib( for (int jb = i_startblk; jb <= i_endblk; ++jb) { // Call get_indices_c_lib to get inner loop indices for block jb. int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, - jb, i_startblk, i_endblk, - i_startidx, i_endidx); + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( {slev, i_startidx}, {elev + 1, i_endidx + 1}); - Kokkos::parallel_for("edges2cells_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - // Compute the bilinear interpolation for cell (jc, jk, jb). - p_u_out_view(jc, jk, jb) = - e_bln_c_u_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_u_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + Kokkos::parallel_for( + "edges2cells_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1); - p_v_out_view(jc, jk, jb) = - e_bln_c_v_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_v_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); - }); + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1); + }); // Optionally fence after each block if required. Kokkos::fence(); } } -extern "C" void edges2cells_vector_lib_dp( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c) -{ - edges2cells_vector_lib<double>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); -} +template void edges2cells_vector_lib<double>( + const double *p_vn_in, const double *p_vt_in, const int *cell_edge_idx, + const int *cell_edge_blk, const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); -extern "C" void edges2cells_vector_lib_sp( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c) -{ - edges2cells_vector_lib<float>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); -} +template void edges2cells_vector_lib<float>( + const float *p_vn_in, const float *p_vt_in, const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, const float *e_bln_c_v, + float *p_u_out, float *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp index 0d19b24..9186997 100644 --- a/src/interpolation/mo_lib_interpolation_vector.hpp +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -8,42 +8,26 @@ // See LICENSES/ for license information // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#pragma once +#include "mo_lib_loopindices.hpp" #include <Kokkos_Core.hpp> #include <vector> +// The templated C++ function using Kokkos. +// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. +// Note: The dimensions below must match the Fortran arrays. +// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] +// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] +// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] +// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] template <typename T> -void edges2cells_vector_lib( - const T* p_vn_in, const T* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* e_bln_c_u, const T* e_bln_c_v, - T* p_u_out, T* p_v_out, - // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - // Dimensions for the arrays. - int nlev, int nblks_e, int nblks_c); - -extern "C" void edges2cells_vector_lib_dp( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); - -extern "C" void edges2cells_vector_lib_sp( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); +void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in, + const int *cell_edge_idx, const int *cell_edge_blk, + const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out, + T *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 13c5dfe..c9320cb 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -21,11 +21,16 @@ FetchContent_MakeAvailable(googletest) # Find Kokkos (or use your existing Kokkos installation) # find_package(Kokkos REQUIRED) +if(IM_ENABLE_LOOP_EXCHANGE) + target_compile_definitions(iconmath-interpolation PRIVATE __LOOP_EXCHANGE) +endif() + set(SOURCES main.cpp test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp + test_interpolation_scalar.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp new file mode 100644 index 0000000..0ee7fa3 --- /dev/null +++ b/test/c/test_interpolation_scalar.cpp @@ -0,0 +1,532 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_interpolation_scalar.hpp" +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <vector> + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template <typename InT, typename OutT> struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float>> + MixedTypes; + +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<float, double>, + MixedPrecision<float, float>> + MixedTypesSP2DP; + +// Shared dimensions for all routines and classes +class interp_dimensions { +public: + // Constant dimensions. + static constexpr int nproma = 16; // inner loop length + static constexpr int nlev = 7; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = + 2; // number of vertex blocks (for rbf arrays and outputs) + + // Parameter values. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx = 2; + const int i_endidx = nproma - 3; // Partial range: 2 .. nproma-3 + const int slev = 1; + const int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. +}; + +template <typename T> +class InterpolationScalarTypedTestFixture : public ::testing::Test, + public interp_dimensions { +public: + // Arrays used for verts2edges + std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) + std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) + std::vector<T> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) + std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + + // Arrays used for edges2verts + std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) + std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) + std::vector<T> v_int; // Dimensions: (nproma, 6, nblks_v) + std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + // Arrays used for edges2cells + // std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) + std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + + // Arrays used for verts2cells + std::vector<T> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) + + // Arrays used for avg_lib + std::vector<T> psi_c; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) + std::vector<T> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) + + const int cell_type = 6; + const int npromz_c = 32; + + InterpolationScalarTypedTestFixture() { + // Allocate and initialize arrays needed for verts2edges + p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(1)); + edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); + edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); + coeff_int_edges.resize(num_elements_3d<T>(nproma, 2, nblks_e), + static_cast<T>(1)); + + p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), + static_cast<T>(0)); + + // Allocate & Initialize arrays needed for edges2verts + p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), + static_cast<T>(1)); + edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); + edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); + v_int.resize(num_elements_3d<T>(nproma, 6, nblks_v), static_cast<T>(1)); + + p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(0)); + + // Allocate & Initialize arrays needed for edges2cells + edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + coeff_int_cells.resize(num_elements_3d<T>(nproma, 3, nblks_c), + static_cast<T>(1)); + + p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(0)); + + // Allocate and initialize arrays needed for verts2cells + p_vert_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(1)); + cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + + // Allocate and initialize arrays needed for avg_lib + psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); + cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + avg_coeff.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(1)); + + // Allocate output arrays and initialize to zero. + avg_psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(0)); + } +}; + +typedef ::testing::Types<float, double> SingleType; + +TYPED_TEST_SUITE(InterpolationScalarTypedTestFixture, SingleType); + +//////////////////////////////////////////////////////////////////////////////// +// +// ! verts2edges +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { + + verts2edges_scalar_lib<TypeParam>( + this->p_vertex_in.data(), this->edge_vertex_idx.data(), + this->edge_vertex_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_v, this->nblks_e, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx, i_endidx] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 2 stencil points, + // expect 2. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<TypeParam>(2), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! edges2verts +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { + + edges2verts_scalar_lib<TypeParam>( + this->p_edge_in.data(), this->edge_vert_idx.data(), + this->edge_vert_blk.data(), this->v_int.data(), this->p_vert_out.data(), + this->i_startblk, this->i_endblk, this->i_startidx, this->i_endidx, + this->slev, this->elev, this->nproma, this->nlev, this->nblks_e, + this->nblks_v, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<TypeParam>(6), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! edges2cells +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { + + edges2cells_scalar_lib<TypeParam>( + this->p_edge_in.data(), this->edge_idx.data(), this->edge_blk.data(), + this->coeff_int_cells.data(), this->p_cell_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx, this->i_endidx, this->slev, this->elev, + this->nproma, this->nlev, this->nblks_e, this->nblks_c, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, + // expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { + + verts2cells_scalar_lib<TypeParam>( + this->p_vert_in.data(), this->cell_index_idx.data(), + this->cell_index_blk.data(), this->coeff_int_cells.data(), + this->p_cell_out.data(), this->nblks_c, this->npromz_c, this->slev, + this->elev, this->nproma, this->nlev, this->nblks_v, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, + // expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cell_avg +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { + + // Call the function + cell_avg_lib<TypeParam>(this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), + this->avg_coeff.data(), this->avg_psi_c.data(), + this->i_startblk, this->i_endblk, this->i_startidx, + this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 4 stencil points, + // expect 4. + EXPECT_NEAR(this->avg_psi_c[idx], static_cast<TypeParam>(4), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +template <typename TypePair> +class InterpolationScalarMixedTestFixture : public ::testing::Test, + public interp_dimensions { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + // Arrays used for cells2edges + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) + std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) + std::vector<OutType> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) + std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + + // Further parameters for cells2edges + const int patch_id = 0; + const bool l_limited_area = false; + const bool lfill_latbc = false; + std::vector<int> i_startblk_in; // Dimensions: (2) + std::vector<int> i_endblk_in; // Dimensions: (2) + std::vector<int> i_startidx_in; // Dimensions: (2) + std::vector<int> i_endidx_in; // Dimensions: (2) + + // Arrays used for cells2verts + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<OutType> coeff_int_verts; // Dimensions: (nproma, 6, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + InterpolationScalarMixedTestFixture() { + // Allocate and initialize arrays needed for cells2edges + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), + static_cast<InType>(1)); + edge_cell_idx.resize(num_elements_3d<int>(nproma, nblks_e, 2), 1); + edge_cell_blk.resize(num_elements_3d<int>(nproma, nblks_e, 2), 0); + coeff_int_edges.resize(num_elements_3d<InType>(nproma, 2, nblks_e), + static_cast<OutType>(1)); + + p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), + static_cast<OutType>(0)); + + // Allocate neighbour indexes for cells2edges + i_startblk_in.resize(2, i_startblk); + i_endblk_in.resize(2, i_endblk); + i_startidx_in.resize(2, i_startidx); + i_endidx_in.resize(2, i_endidx); + + // Allocate & Initialize arrays needed for cells2verts + vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); + vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); + coeff_int_verts.resize(num_elements_3d<InType>(nproma, 6, nblks_v), + static_cast<OutType>(1)); + + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2edges +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cells2edges_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->edge_cell_idx.data(), + this->edge_cell_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk_in.data(), + this->i_endblk_in.data(), this->i_startidx_in.data(), + this->i_endidx_in.data(), this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_e, this->patch_id, + this->l_limited_area, this->lfill_latbc, this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 2 stencil points, + // expect 2. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2verts +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + cells2verts_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int_verts.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2verts ri +// +//////////////////////////////////////////////////////////////////////////////// + +// The test for cells2verts_ri is similar to cells2verts, but is done here +// separtely to avoid as a differebt template instantiation is needed for the +// function call +template <typename Types> +class Cells2vertsriScalarLibTestFixture : public testing::Test, + public interp_dimensions { +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<InType> coeff_int; // Dimensions: (nproma, 6, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + Cells2vertsriScalarLibTestFixture() { + // Allocate and initialize inputs. + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), + static_cast<InType>(1)); + vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); + vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), + static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); + +// Add test +TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cells2verts_scalar_ri_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: +#ifdef __LOOP_EXCHANGE + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; +#else + size_t idx = level + i * this->nlev + block * this->nproma * this->nlev; +#endif + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + } + } + } +} diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index 0eb5a8d..680fb6e 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -9,29 +9,31 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> #include <Kokkos_Core.hpp> +#include <gtest/gtest.h> #include <vector> + #include "mo_lib_interpolation_vector.hpp" // Dimensions for the test (small, trivial test). -// We assume Fortran ordering: column-major, but our C wrappers will wrap raw pointers into Kokkos::Views with LayoutLeft. +// We assume Fortran ordering: column-major, but our C wrappers will wrap raw +// pointers into Kokkos::Views with LayoutLeft. constexpr int nproma = 2; -constexpr int nlev = 3; -constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) -constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients +constexpr int nlev = 3; +constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) +constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients // For the get_indices_c_lib inputs. -constexpr int i_startblk = 0; -constexpr int i_endblk = 1; // two blocks: indices 0 and 1 +constexpr int i_startblk = 0; +constexpr int i_endblk = 1; // two blocks: indices 0 and 1 constexpr int i_startidx_in = 0; -constexpr int i_endidx_in = nproma - 1; // 0 and 1 -constexpr int slev = 0; -constexpr int elev = nlev - 1; // 0 .. 2 +constexpr int i_endidx_in = nproma - 1; // 0 and 1 +constexpr int slev = 0; +constexpr int elev = nlev - 1; // 0 .. 2 -// Helper to compute total number of elements for a 3D array stored in column-major order. -template<typename T> -size_t num_elements(int dim1, int dim2, int dim3) { +// Helper to compute total number of elements for a 3D array stored in +// column-major order. +template <typename T> size_t num_elements(int dim1, int dim2, int dim3) { return static_cast<size_t>(dim1) * dim2 * dim3; } @@ -46,12 +48,13 @@ TEST(Edges2CellsTest, DPTest) { // Here we set cell_edge_idx to 1, 2, 1 for every triple. for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i+1] = 2; - cell_edge_idx[i+2] = 1; + cell_edge_idx[i] = 1; + cell_edge_idx[i + 1] = 2; + cell_edge_idx[i + 2] = 1; } - // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 means block 0 after subtracting 1). - // e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] + // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 + // means block 0 after subtracting 1). e_bln_c_u and e_bln_c_v: dimensions + // [nproma, 6, nblks_c] std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0); std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0); // Output arrays: dimensions [nproma, nlev, nblks_c] @@ -62,16 +65,11 @@ TEST(Edges2CellsTest, DPTest) { std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); // Call the dp (double precision) version. - edges2cells_vector_lib_dp( - p_vn_in.data(), p_vt_in.data(), - cell_edge_idx.data(), cell_edge_blk.data(), - e_bln_c_u.data(), e_bln_c_v.data(), - p_u_out.data(), p_v_out.data(), - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); + edges2cells_vector_lib<double>( + p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), + cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), + p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); // Check that for each computed cell in p_u_out and p_v_out, the value is 6. // This is because for each cell, the kernel adds 6 terms of 1*1. @@ -90,9 +88,9 @@ TEST(Edges2CellsTest, SPTest) { std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); // Set cell_edge_idx values to 1, 2, 1. for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i+1] = 2; - cell_edge_idx[i+2] = 1; + cell_edge_idx[i] = 1; + cell_edge_idx[i + 1] = 2; + cell_edge_idx[i + 2] = 1; } std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f); std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f); @@ -103,16 +101,11 @@ TEST(Edges2CellsTest, SPTest) { std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); // Call the sp (float precision) version. - edges2cells_vector_lib_sp( - p_vn_in.data(), p_vt_in.data(), - cell_edge_idx.data(), cell_edge_blk.data(), - e_bln_c_u.data(), e_bln_c_v.data(), - p_u_out.data(), p_v_out.data(), - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); + edges2cells_vector_lib<float>( + p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), + cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), + p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); // Verify that every computed output equals 6. for (size_t idx = 0; idx < p_u_out.size(); ++idx) { -- GitLab From 609750db9cb3df01ba2bed31fadcb63daa8fe60c Mon Sep 17 00:00:00 2001 From: Ali Sedighi <sedighi@dkrz.de> Date: Thu, 6 Mar 2025 18:01:13 +0000 Subject: [PATCH 39/76] Added C++ version of the routines of mo_lib_intp_rbf (icon-libraries/libiconmath!35) ## What is the new feature The routines in mo_lib_intp_rbf are ported to C++ ## How is it implemented Kokkos is used to manage the memory and the for loops Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- src/interpolation/CMakeLists.txt | 6 +- src/interpolation/interpolation_bindings.cpp | 130 +++++ src/interpolation/interpolation_bindings.h | 65 +++ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 -------- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 -- ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ----- ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 -- src/interpolation/mo_lib_intp_rbf.cpp | 475 ++++++++++++++++++ src/interpolation/mo_lib_intp_rbf.hpp | 50 ++ test/c/test_interpolation_scalar.cpp | 9 +- test/c/test_intp_rbf.cpp | 310 +++++++++--- 11 files changed, 970 insertions(+), 492 deletions(-) delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h create mode 100644 src/interpolation/mo_lib_intp_rbf.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf.hpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 1051516..96f281c 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -16,10 +16,8 @@ add_library( mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp - interpolation_bindings.cpp -) + mo_lib_intp_rbf.cpp + interpolation_bindings.cpp) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 628f411..4524ad7 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -12,6 +12,7 @@ #include "interpolation_bindings.h" #include "mo_lib_interpolation_scalar.hpp" #include "mo_lib_interpolation_vector.hpp" +#include "mo_lib_intp_rbf.hpp" // This is the binding for mo_interpolation_vector::edges2cells_vector_lib // (wp=dp) @@ -326,3 +327,132 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, avg_psi_c, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dp_lib +void rbf_vec_interpol_vertex_lib_dp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_sp_lib +void rbf_vec_interpol_vertex_lib_sp( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dpsp_lib +void rbf_vec_interpol_vertex_lib_dpsp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) +void rbf_interpol_c2grad_lib_sp(const float *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<float>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) +void rbf_interpol_c2grad_lib_dp(const double *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<double>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) +void rbf_vec_interpol_cell_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { + + rbf_vec_interpol_cell_lib<float>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) +void rbf_vec_interpol_cell_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { + + rbf_vec_interpol_cell_lib<double>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=dp) +void rbf_vec_interpol_edge_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async) { + + rbf_vec_interpol_edge_lib<double>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=sp) +void rbf_vec_interpol_edge_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async) { + + rbf_vec_interpol_edge_lib<float>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 7cb873d..64c6a8c 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -185,4 +185,69 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc); + +void rbf_vec_interpol_vertex_lib_dp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_vec_interpol_vertex_lib_sp( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_vec_interpol_vertex_lib_dpsp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_interpol_c2grad_lib_sp( + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); + +void rbf_interpol_c2grad_lib_dp( + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); + +void rbf_vec_interpol_cell_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +void rbf_vec_interpol_cell_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +void rbf_vec_interpol_edge_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +void rbf_vec_interpol_edge_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); } diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp deleted file mode 100644 index c9b776e..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() -/// -/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf - -#include <type_traits> -#include <Kokkos_Core.hpp> -#include "mo_lib_loopindices.hpp" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - - -constexpr int rbf_vec_dim_v = 6; - -//------------------------------------------------------------------------- -// -// -//> -/// Performs vector RBF reconstruction at triangle vertices. -/// -/// Theory described in Narcowich and Ward (Math Comp. 1994) and -/// Bonaventura and Baudisch (Mox Report n. 75). -/// It takes edge based variables as input and combines them -/// into three dimensional cartesian vectors at each vertex. -/// -/// Two templated variables in order to support mixed precision. -/// Intended that type_traits::is_floating_point(T,S)==TRUE -/// precision(T) >= precision(S) -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - // Dimensions for the arrays. - const int nlev, const int nblks_e, const int nblks_v - ) -{ - /* -#ifdef DIM_ENABLE_GPU - if (lacc){ using MemSpace = Kokkos::CudaSpace; - } else { using MemSpace = Kokkos::HostSpace; } -#else - using MemSpace = Kokkos::HostSpace; -#endif - - */ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - - - - // input components of velocity or horizontal vorticity vectors at edge midpoints - // dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); - - // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex - // (rbf_vec_dim_v,nproma,nblks_v) - UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); - UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); - - // coefficients are working precision array containing the coefficients used for vector rbf interpolation - // at each tringle vertex (input is normal component), - // dim: (rbf_vec_dim_v,2,nproma,nblks_v) - UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); - - // reconstructed x-component (u) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); - // reconstructed y-component (v) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); - - // Local vars - //int jv, jk, jb; // integer over vertices, levels, and blocks, - int jb; // integer over vertices, levels, and blocks, - int i_startidx; // start index - int i_endidx; // end index - - for (jb=i_startblk; jb <= i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - // NOTE: Static indexes reduced by 1 from Fortran version - p_u_out_view(jv, jk, jb) = - ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - p_v_out_view(jv, jk, jb) = - ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - } - ); - } -} - -// Explicit instantiation - double precision -template -void rbf_vec_interpol_vertex_lib<double, double>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - single precision -template -void rbf_vec_interpol_vertex_lib<float, float>( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - mixed precision -template -void rbf_vec_interpol_vertex_lib<double, float>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp deleted file mode 100644 index c0b6f05..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_c - ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp deleted file mode 100644 index 06dc467..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); -} - - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - -void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h deleted file mode 100644 index 4356f88..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h +++ /dev/null @@ -1,54 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -extern "C" { - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -} \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp new file mode 100644 index 0000000..d1178a6 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -0,0 +1,475 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf.hpp" +#include <Kokkos_Core.hpp> + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v) { + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + + // input components of velocity or horizontal vorticity vectors at edge + // midpoints dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf + // interpolation at each triangle vertex (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used + // for vector rbf interpolation at each tringle vertex (input is normal + // component), dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, + nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + // int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb = i_startblk; jb <= i_endblk; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + }); + } +} + +template <typename T> +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc) { + + // aliases for unmanaged Kokkos views + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + // to avoid memory ownership issues + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); + UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c); + UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, + nproma, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_interpol_c2grad", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + grad_x_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + + grad_y_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + }); + + } // for +} // void + +//------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, + nblks_c); // TODO + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_cell_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_u_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + + p_v_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + }); + Kokkos::fence(); + } // for +} // void + +//------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedT3D p_vt_out_view(p_vt_out, nproma, nlev, nblks_e); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_edge_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_vt_out_view(je, jk, jb) = + rbf_vec_coeff_e_view(0, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, + rbf_vec_blk_e_view(0, je, jb)) + + rbf_vec_coeff_e_view(1, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, + rbf_vec_blk_e_view(1, je, jb)) + + rbf_vec_coeff_e_view(2, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, + rbf_vec_blk_e_view(2, je, jb)) + + rbf_vec_coeff_e_view(3, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, + rbf_vec_blk_e_view(3, je, jb)); + }); + } +} + +// Explicit instantiation - double precision +template void rbf_vec_interpol_vertex_lib<double, double>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +// Explicit instantiation - single precision +template void rbf_vec_interpol_vertex_lib<float, float>( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +// Explicit instantiation - mixed precision +template void rbf_vec_interpol_vertex_lib<double, float>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +template void rbf_vec_interpol_cell_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_vec_interpol_cell_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_interpol_c2grad_lib<double>( + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_interpol_c2grad_lib<float>( + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_vec_interpol_edge_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +template void rbf_vec_interpol_edge_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp new file mode 100644 index 0000000..8a85502 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const bool lacc, + const bool acc_async, const int nlev, const int nblks_e, const int nblks_c); + +template <typename T> +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc); + +template <typename T> +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +template <typename T> +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async); diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 0ee7fa3..507ec3f 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -48,8 +48,7 @@ public: static constexpr int nlev = 7; // number of vertical levels static constexpr int nblks_c = 2; // number of cell blocks static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = - 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int nblks_v = 2; // number of vertex blocks // Parameter values. const int i_startblk = 0; @@ -385,7 +384,7 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -424,7 +423,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -496,7 +495,7 @@ public: TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test -TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { +TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 0aa4f9b..040d440 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -9,117 +9,295 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> +#include "mo_lib_intp_rbf.hpp" #include <Kokkos_Core.hpp> +#include <algorithm> +#include <gtest/gtest.h> +#include <numeric> #include <vector> -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" // Free-function helpers for 3D and 4D array sizes (assumed column-major) -template<typename T> -size_t num_elements_3d(int d1, int d2, int d3) { +template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { return static_cast<size_t>(d1) * d2 * d3; } -template<typename T> -size_t num_elements_4d(int d1, int d2, int d3, int d4) { +template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { return static_cast<size_t>(d1) * d2 * d3 * d4; } // Define a helper struct that holds the two types. -template<typename InT, typename OutT> -struct MixedPrecision { - using in_type = InT; +template <typename InT, typename OutT> struct MixedPrecision { + using in_type = InT; using out_type = OutT; }; // Define the list of type pairs we want to test. -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<double, float>, - MixedPrecision<float, float> > MixedTypes; +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float>> + MixedTypes; + +class interp_dimensions { +public: + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks + static constexpr int nblks_v = 2; // number of vertex blocks + static constexpr int rbf_c2grad_dim = 10; // fixed dimension + static constexpr int rbf_vec_dim_c = 9; + static constexpr int rbf_vec_dim_e = 4; + + // Parameter values. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx_in = 0; + const int i_endidx_in = nproma - 1; + const int slev = 0; + const int elev = nlev - 1; + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. +}; + +// Define a typed test fixture for the functions which have the same input and +// output types +template <typename T> +class RbfInterpolTypedTestFixture : public ::testing::Test, + public interp_dimensions { +public: + // Data arrays. + std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c + std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_vec_idx_c; // size: rbf_vec_dim_c * nproma * nblks_c + std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c + std::vector<T> + rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c + std::vector<T> grad_x; // size: nproma * nlev * nblks_c + std::vector<T> grad_y; // size: nproma * nlev * nblks_c + std::vector<T> p_vn_in; + std::vector<T> rbf_vec_coeff_c; + std::vector<T> p_u_out; + std::vector<T> p_v_out; + + std::vector<int> rbf_vec_idx_e; + std::vector<int> rbf_vec_blk_e; + std::vector<T> rbf_vec_coeff_e; + std::vector<T> p_vt_out; + + RbfInterpolTypedTestFixture() { + size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; + size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; + size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; + + size_t size3d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; + size_t size_4d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; + + size_t size3d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; + size_t size_4d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; + + p_cell_in.resize(size3d, static_cast<T>(1)); + p_vn_in.resize(size3d, static_cast<T>(1)); + + rbf_vec_idx_c.resize(size3d_vec_dim, 1); + rbf_vec_blk_c.resize(size3d_vec_dim, 0); + rbf_c2grad_idx.resize(size3d_idx, 1); + rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + rbf_vec_idx_e.resize(size3d_vec_dim, 1); + rbf_vec_blk_e.resize(size3d_vec_dim, 0); + + rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1)); + rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); + rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1)); + + p_u_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_v_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_vt_out.resize(size3d_edge_lib, static_cast<T>(0)); + + grad_x.resize(size3d, static_cast<T>(0)); + grad_y.resize(size3d, static_cast<T>(0)); + } +}; + +typedef ::testing::Types<float, double> MyTypes; + +TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); + +TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { + using T = TypeParam; + rbf_interpol_c2grad_lib<TypeParam>( + this->p_cell_in.data(), this->rbf_c2grad_idx.data(), + this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(), + this->grad_x.data(), this->grad_y.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->lacc); -// Define a typed test fixture. + // For each block from i_startblk to i_endblk-1, and for each (i, level) + // the kernel sums rbf_c2grad_dim contributions, each equal to 1. + // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10). + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->grad_x[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk + << ", index " << i; + EXPECT_NEAR(this->grad_y[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { + using T = TypeParam; + + rbf_vec_interpol_cell_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_c.data(), + this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->nblks_e, this->lacc, this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), + static_cast<T>(1e-5)) + << "p_u_out failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +TYPED_TEST(RbfInterpolTypedTestFixture, Edge) { + using T = TypeParam; + + rbf_vec_interpol_edge_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_e.data(), + this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(), + this->p_vt_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, + this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc, + this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), + static_cast<T>(1e-5)) + << "p_vt_out failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +// Define a typed test fixture for the functions which have different input and +// output types template <typename TypePair> -class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +class RbfVecInterpolMixedTestFixture : public ::testing::Test, + public interp_dimensions { public: - using InType = typename TypePair::in_type; + using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = + 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = + 6; // fixed dimension for rbf vector (stencil points) // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] int i_startidx_in = 0; - int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. // Arrays stored in std::vector. - std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) - std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) - std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) - - RbfVecInterpolVertexMixedTestFixture() { + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> + rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolMixedTestFixture() { // Allocate and initialize inputs. - p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), + static_cast<InType>(1)); rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); - rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + rbf_vec_coeff_v.resize( + num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), + static_cast<InType>(1)); // Allocate output arrays and initialize to zero. - p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); } }; -TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); +TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); -TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { - using InType = typename TestFixture::InType; +TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) { + using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; // Call the function with mixed precision. rbf_vec_interpol_vertex_lib<InType, OutType>( - this->p_e_in.data(), - this->rbf_vec_idx_v.data(), - this->rbf_vec_blk_v.data(), - this->rbf_vec_coeff_v.data(), - this->p_u_out.data(), - this->p_v_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nproma, - this->lacc, - this->acc_async, - this->nlev, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + this->p_e_in.data(), this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->lacc, this->acc_async, this->nlev, + this->nblks_e, this->nblks_v); // Check the outputs only for blocks in the range [i_startblk, i_endblk]. for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = 0; level < this->nlev; ++level) { for (int i = 0; i < this->nproma; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } -- GitLab From 44745ae6069c9874de2d56b839fcf1295217fef7 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:40:10 +0000 Subject: [PATCH 40/76] Implement C++ code for divrot (icon-libraries/libiconmath!33) ## What is the new feature Implement C++ functions for the Fortran `mo_divrot` module ## How is it implemented This first version is implemented using simple templated functions with Kokkos Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- _typos.toml | 2 + src/horizontal/CMakeLists.txt | 3 +- src/horizontal/mo_lib_divrot.cpp | 1359 ++++++++++++++++++++++++++++++ src/horizontal/mo_lib_divrot.hpp | 130 +++ src/types.hpp | 16 + test/c/CMakeLists.txt | 6 + test/c/dim_helper.hpp | 88 ++ test/c/test_horizontal_div.cpp | 1070 +++++++++++++++++++++++ test/c/test_horizontal_recon.cpp | 1199 ++++++++++++++++++++++++++ test/c/test_horizontal_rot.cpp | 378 +++++++++ 10 files changed, 4250 insertions(+), 1 deletion(-) create mode 100644 src/horizontal/mo_lib_divrot.cpp create mode 100644 src/horizontal/mo_lib_divrot.hpp create mode 100644 src/types.hpp create mode 100644 test/c/dim_helper.hpp create mode 100644 test/c/test_horizontal_div.cpp create mode 100644 test/c/test_horizontal_recon.cpp create mode 100644 test/c/test_horizontal_rot.cpp diff --git a/_typos.toml b/_typos.toml index 4fe4968..8de4a86 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,6 +1,7 @@ [default] extend-ignore-re = [ ".*_pn", + "f4dout_*", ] extend-ignore-words-re = [ "Comput", @@ -10,6 +11,7 @@ extend-ignore-words-re = [ Wirth = "Wirth" # author name nin = "nin" # number of inputs Pilar = "Pilar" # author name +Comput = "Comput" # abbreviation for Computational [default.extend-identifiers] f4dout = "f4dout" # file name diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 078a14d..f3b75c0 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -11,6 +11,7 @@ add_library( iconmath-horizontal + mo_lib_divrot.cpp mo_lib_divrot.F90 mo_lib_laplace.F90 mo_lib_gradients.F90) @@ -57,7 +58,7 @@ target_include_directories( # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html - $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple # compile languages diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp new file mode 100644 index 0000000..d086e8b --- /dev/null +++ b/src/horizontal/mo_lib_divrot.cpp @@ -0,0 +1,1359 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <vector> + +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +template <typename T> +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_d("z_d", lsq_dim_c); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, + nblks_c); + UnmanagedConstT3D lsq_rmat_utri_c_view( + lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_l_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + // matrix multiplication Q^T d (partitioned into 2 dot products) + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); + + p_coeff_view(2, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); + p_coeff_view(1, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 0, jb) * + (z_qt_times_d(0) - + lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L); + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_b("z_b", lsq_dim_c); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - + ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q); + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + bool l_limited_area, bool lacc, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_c_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_c_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); + p_coeff_view(8, jc, jk, jb) = + ptr_rrdiag(jc, 7, jb) * + (z_qt_times_d(7) - + ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); + p_coeff_view(7, jc, jk, jb) = + ptr_rrdiag(jc, 6, jb) * + (z_qt_times_d(6) - + (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(6, jc, jk, jb) = + ptr_rrdiag(jc, 5, jb) * + (z_qt_times_d(5) - + (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + + ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) + + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) + + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) + + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) + + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) + + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8)); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C); + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + bool l_limited_area, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_b("z_b", 9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( + {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_init", initPolicy, + KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(9, jc, jk, jb) = + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); + p_coeff_view(8, jc, jk, jb) = + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); + p_coeff_view(7, jc, jk, jb) = + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); + p_coeff_view(6, jc, jk, jb) = + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) - + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) - + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) - + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD); + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D); + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, + const T *in2, T *out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); + UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_2field_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + + out2_view(jc, jk, jb) = + in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD); + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T *f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + + UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); + UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, + {elev[ji], i_endidx}); + Kokkos::parallel_for( + "div4d_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + f4dout_view(jc, jk, jb, ji) = + f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) * + geofac_div_view(jc, 0, jb) + + f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) * + geofac_div_view(jc, 1, jb) + + f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) * + geofac_div_view(jc, 2, jb); + }); + } + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D); + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T *div_vec_c, const T *opt_in2, T *opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); + UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); + + Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c); + + int i_startblk = i_startblk_in[0]; + int i_endblk = i_endblk_in[0]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + aux_c2(jc, jk, jb) = + opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } + + if (patch_id > 0 || l_limited_area) { + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step3", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb); + }); + } + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step4", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb); + }); + } + } + } + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step5", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + opt_out2_view(jc, jk, jb) = + aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step6", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG); + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS); + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI); diff --git a/src/horizontal/mo_lib_divrot.hpp b/src/horizontal/mo_lib_divrot.hpp new file mode 100644 index 0000000..b8e9743 --- /dev/null +++ b/src/horizontal/mo_lib_divrot.hpp @@ -0,0 +1,130 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#include <Kokkos_Core.hpp> +#include <types.hpp> + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \ + void recon_lsq_cell_l( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \ + void recon_lsq_cell_l_svd( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ + void recon_lsq_cell_q( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ + void recon_lsq_cell_q_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ + void recon_lsq_cell_c( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ + void recon_lsq_cell_c_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) + +#define ICONMATH_DECLARE_DIV3D(_type) \ + void div3d(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, \ + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \ + void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, const _type *in2, _type *out2, \ + int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV4D(_type) \ + void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \ + const _type *geofac_div, const _type *f4din, _type *f4dout, \ + int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, const int *slev, const int *elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV_AVG(_type) \ + void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \ + _type *opt_out2, const int *i_startblk_in, \ + const int *i_endblk_in, const int *i_startidx_in, \ + const int *i_endidx_in, int slev, int elev, int nproma, \ + int patch_id, bool l_limited_area, bool l2fields, bool lacc, \ + int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \ + void rot_vertex_atmos( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_e, int nblks_v) + +#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \ + void rot_vertex_ri( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) + +// Declare as templates +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T); +template <typename T> ICONMATH_DECLARE_DIV3D(T); +template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T); +template <typename T> ICONMATH_DECLARE_DIV4D(T); +template <typename T> ICONMATH_DECLARE_DIV_AVG(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T); diff --git a/src/types.hpp b/src/types.hpp new file mode 100644 index 0000000..7192e18 --- /dev/null +++ b/src/types.hpp @@ -0,0 +1,16 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#define ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(_macro) \ + template _macro(float); \ + template _macro(double) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index c9320cb..98a21b2 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -27,6 +27,9 @@ endif() set(SOURCES main.cpp + test_horizontal_div.cpp + test_horizontal_recon.cpp + test_horizontal_rot.cpp test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp @@ -35,11 +38,14 @@ set(SOURCES # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) +target_include_directories(iconmath_test_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + # Link the test executable with GoogleTest and Kokkos. target_link_libraries(iconmath_test_c PUBLIC iconmath-support iconmath-interpolation + iconmath-horizontal PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/dim_helper.hpp b/test/c/dim_helper.hpp new file mode 100644 index 0000000..165d5d9 --- /dev/null +++ b/test/c/dim_helper.hpp @@ -0,0 +1,88 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +// Template function for computing array size. +// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by +// dim_combine(2, 3, 4, 5). +// Which will automatically instantiate +// dim_combine<int, int, int, int>(2, 3, 4, 5). +// The function then call dim_combine recursively +// dim_combine<int, int, int, int>(2, 3, 4, 5) { +// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); +// } +// dim_combine<int, int, int>(3, 4, 5) { +// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); +// } +// dim_combine<int, int>(4, 5) { +// return static_cast<size_t>(4) * dim_combine<int>(5); +// } +// Where the last dim_combine is specialized as +// dim_combine<int>(5) { +// return static_cast<size_t>(5); +// } +// Which gives +// dim_combine<int, int, int, int>(2, 3, 4, 5) = +// static_cast<size_t>(2) * static_cast<size_t>(3) * +// static_cast<size_t>(4) * static_cast<size_t>(5) +/// Template helpers for combining multiple dimension array sizes. +/// The base function of dimension combine. Should not be used. +template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +/// Template specialization of only one dimension, returns the dimension itself. +template <typename T> size_t dim_combine(T dim) { + return static_cast<size_t>(dim); +} +/// Template specialization of picking out the first dimension. The combined +/// dimension is the first dimension times the combined dimension of the rest. +template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { + return static_cast<size_t>(dim) * dim_combine(dims...); +} + +// Template function for LayoutLeft ID access in compile time. +// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its +// corresponding vector id (LayoutLeft) by +// at<2, 3, 4, 5>(id1, id2, id3, id4). +// The at_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive at_impl function. In this example, +// at<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); +// } +// at_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); +// } +// at_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); +// } +// at_impl<5>(2 * 3 * 4, id4) { +// return id4 * 2 * 3 * 4; +// } +// Which gives +// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// id3 * 2 * 3 + id4 * 2 * 3 * 4 +/// Helper type converting integer numbers to int +template <class T, auto> using always_t = T; +/// Base function of at_impl. Should not be used. +template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } +/// Template specialization of the last ID +template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } +/// Template specialization of at_impl, accumulate the return value using the +/// first id and pass the prefix to the next recursive at_impl function. +template <int FirstDim, int... Dims> +int at_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); +} +/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +/// LayoutLeft. Use this function instead of at_impl. +template <int FirstDim, int... Dims> +int at(int id, always_t<int, Dims>... ids) { + return id + at_impl<Dims...>(FirstDim, ids...); +} diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp new file mode 100644 index 0000000..596d19e --- /dev/null +++ b/test/c/test_horizontal_div.cpp @@ -0,0 +1,1070 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal divergence tests. Templated for the ValueType +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> cell_edge_idx; + std::vector<int> cell_edge_blk; + std::vector<ValueType> geofac_div; + std::vector<ValueType> div_vec_c; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + // Followings are needed in HorizontalDivAvgTest + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> avg_coeff; + std::vector<ValueType> opt_in2; + std::vector<ValueType> opt_out2; + + HorizontalDivTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); + geofac_div.resize(dim_combine(nproma, 3, nblks_c)); + div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); + opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); + opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->f4din[f4d_at(i, k, 0, 0)] = + (i + 1) * (k + 2); // Different pattern for second field + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c and f4dout to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Check first field (same as in div3d test) + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + + // Check second field (expected values calculated manually) + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c and f4dout to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Calculate reference value for first field + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + // Calculate reference value for second field + ref_f4dout[f4dout_at(jc, jk, jb, 0)] = + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], + 0)] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], + 0)] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], + 0)] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results for first field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "First field results differ at i=" << i << ", k=" << k; + } + } + + // Verify results for second field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + << "Second field results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Compute reference result and check + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + TypeParam expected = 0.0; + for (int je = 0; je < 3; ++je) { + expected += + this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * + this->geofac_div[geofac_div_at(jc, je, jb)]; + } + + EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + << "Random test fails at jc=" << jc << ", jk=" << jk + << ", jb=" << jb << ", ji=" << ji; + } + } + } + } +} + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for additional parameters + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; // Set to false for this test + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + } + + // Call the div_avg function with l2fields=false + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated + // since l2fields=false) + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, + // not opt_out2) + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results - only check div_vec_c since l2fields=false means opt_out2 + // isn't updated + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + } + } +} diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp new file mode 100644 index 0000000..8938a10 --- /dev/null +++ b/test/c/test_horizontal_recon.cpp @@ -0,0 +1,1199 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +/// Base test class for the horizontal reconstruct tests. Templated for the ValueType +/// and ReconMethod for the reconstruction method. +template <typename ValueType, int ReconMethod> +class HorizontalReconTest : public ::testing::Test { +protected: + // lsq_dim_c and lsq_dim_unk are instantiated in compile time. + static constexpr std::tuple<int, int> + init_lsq_dim(ReconstructionMethod method) { + switch (method) { + case ReconstructionMethod::linear: + return std::make_tuple(3, 2); + case ReconstructionMethod::quadratic: + return std::make_tuple(9, 5); + case ReconstructionMethod::cubic: + return std::make_tuple(9, 9); + } + } + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 1; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); + static constexpr int lsq_dim_c = std::get<0>(lsq_dim); + static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); + + // Parameter values. + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev; // Full vertical range (0 .. nlev-1) + int patch_id = 0; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // With conservative correction. + bool l_limited_area = true; // Limited area setup + + std::vector<ValueType> p_cc; + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> lsq_qtmat_c; + std::vector<ValueType> lsq_rmat_rdiag_c; + std::vector<ValueType> lsq_rmat_utri_c; + std::vector<ValueType> lsq_moments; + std::vector<ValueType> lsq_pseudoinv; + std::vector<ValueType> p_coeff; + + HorizontalReconTest() { + p_cc.resize(dim_combine(nproma, nlev, nblks_c)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); + lsq_rmat_utri_c.resize(dim_combine( + nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); + lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); + } +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to linear. +template <typename ValueType> +class HorizontalReconLinearTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::linear)> { +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to quadratic. +template <typename ValueType> +class HorizontalReconQuadraticTest + : public HorizontalReconTest< + ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to cubic. +template <typename ValueType> +class HorizontalReconCubicTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::cubic)> { +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + z_qt_times_d[0] = 0.0; + z_qt_times_d[1] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d[0] - + this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.24, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 3.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -2.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 2.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -3.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 2.6, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization is done only for iblk = 0 and ilev = 0 + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; + this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; + this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; + this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.28, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.4, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp new file mode 100644 index 0000000..68e8024 --- /dev/null +++ b/test/c/test_horizontal_rot.cpp @@ -0,0 +1,378 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal rotation tests. Templated for the ValueType. +template <typename ValueType> +class HorizontalRotVertexTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> vert_edge_idx; + std::vector<int> vert_edge_blk; + std::vector<ValueType> geofac_rot; + std::vector<ValueType> rot_vec; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalRotVertexTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); + vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); + geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); + rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Ensure computation is complete for both modes + Kokkos::fence(); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k << ")"; + } + } +} + -- GitLab From c880792ed7aab8ad9948058318ba53fa48d1ea4b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 15:11:54 +0000 Subject: [PATCH 41/76] Update googletest version (icon-libraries/libiconmath!44) ## What is the new feature Update googletest version. The old v1.12.1 uses a deprecated CMake minimum version. ## How is it implemented Update the download version in FetchContent Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- test/c/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 98a21b2..90ab1e3 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -10,13 +10,18 @@ # --------------------------------------------------------------- # Fetch GoogleTest via FetchContent +message(CHECK_START "Fetching external googletest") +if("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.24") + cmake_policy(SET CMP0135 NEW) +endif() include(FetchContent) FetchContent_Declare( googletest - URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip + URL https://github.com/google/googletest/releases/download/v1.16.0/googletest-1.16.0.tar.gz ) # set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) +message(CHECK_PASS "done") # Find Kokkos (or use your existing Kokkos installation) # find_package(Kokkos REQUIRED) -- GitLab From 739452a412e39e5b9098721233a0d446e374611c Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 24 Feb 2025 15:59:28 +0100 Subject: [PATCH 42/76] Add cpp implementations (untested) --- src/horizontal/CMakeLists.txt | 4 + src/horizontal/lib_divrot.cpp | 328 ++++++++++++++++++++++++++++++ src/horizontal/lib_divrot.hpp | 42 ++++ test/c/test_horizontal_divrot.cpp | 0 4 files changed, 374 insertions(+) create mode 100644 src/horizontal/lib_divrot.cpp create mode 100644 src/horizontal/lib_divrot.hpp create mode 100644 test/c/test_horizontal_divrot.cpp diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index f3b75c0..75916bc 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -59,6 +59,10 @@ target_include_directories( # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> +<<<<<<< HEAD +======= + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> +>>>>>>> 670c30e (Add cpp implementations (untested)) PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple # compile languages diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp new file mode 100644 index 0000000..9dd698c --- /dev/null +++ b/src/horizontal/lib_divrot.cpp @@ -0,0 +1,328 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> +#include <vector> + +template <typename T> +void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(3); + UnmanagedT3D z_qt_times_d(2); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, + nblks_c); + UnmanagedConstT3D lsq_rmat_utri_c_view( + lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_l_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + // matrix multiplication Q^T d (partitioned into 2 dot products) + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3); + + p_coeff_view(3, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 2, jb) * z_qt_times_d(2); + p_coeff_view(2, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 1, jb) * + (z_qt_times_d(1) - + lsq_rmat_utri_c_view(jc, 1, jb) * p_coeff_view(3, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(1, jc, jk, jb) = + p_coeff_view(1, jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_b(3); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3); + p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(1, jc, jk, jb) = + p_coeff_view(1, jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); + UnmanagedT3D z_qt_times_d(5); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk ^ 2 - lsq_dim_unk) / 2, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {1, i_startidx_in, slev, i_startblk}, + {7, i_endidx_in + 1, elev + 1, i_endblk + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_q_init", initPolicy, + KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { + p_coeff_view(z, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_q_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + z_d(9, jc, jk) = p_cc_view(iidx(jc, jb, 9), jk, iblk(jc, jb, 9)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 1, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 2, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 3, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 4, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 5, 9, jb) * z_d(9, jc, jk); + + p_coeff_view(6, jc, jk, jb) = ptr_rrdiag(jc, 5, jb) * z_qt_times_d(5); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + ptr_rutri(jc, 1, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 3, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 10, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + p_cc(jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5); + }); + } + + Kokkos::fence(); +} diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp new file mode 100644 index 0000000..6977e5d --- /dev/null +++ b/src/horizontal/lib_divrot.hpp @@ -0,0 +1,42 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#include <Kokkos_Core.hpp> + +template <typename T> +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, + int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp new file mode 100644 index 0000000..e69de29 -- GitLab From fcef4c9f20f29b7fdd4dcbaf4a62e0eaca2f9d97 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 24 Feb 2025 22:25:34 +0100 Subject: [PATCH 43/76] Fix indexes --- src/horizontal/lib_divrot.cpp | 162 +++++++++++++++++----------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 9dd698c..addd485 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -49,46 +49,46 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_l_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_d(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); // matrix multiplication Q^T d (partitioned into 2 dot products) - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3); + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); - p_coeff_view(3, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 2, jb) * z_qt_times_d(2); p_coeff_view(2, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 1, jb) * - (z_qt_times_d(1) - - lsq_rmat_utri_c_view(jc, 1, jb) * p_coeff_view(3, jc, jk, jb)); - p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); + p_coeff_view(1, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 0, jb) * + (z_qt_times_d(0) - + lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); if (l_consv) { Kokkos::parallel_for( "recon_lsq_cell_l_consv", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(1, jc, jk, jb) = - p_coeff_view(1, jc, jk, jb) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); }); } } @@ -126,41 +126,41 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_l_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3); p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3); - p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); if (l_consv) { Kokkos::parallel_for( "recon_lsq_cell_l_svd_consv", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(1, jc, jk, jb) = - p_coeff_view(1, jc, jk, jb) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); }); } } @@ -199,13 +199,13 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, lsq_dim_c, nblks_c); UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, - (lsq_dim_unk ^ 2 - lsq_dim_unk) / 2, nblks_c); + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); if (patch_id > 1 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {1, i_startidx_in, slev, i_startblk}, - {7, i_endidx_in + 1, elev + 1, i_endblk + 1}); + {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { @@ -213,16 +213,18 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, }); } - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_q_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - @@ -239,88 +241,86 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, p_cc_view(jc, jk, jb); z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - z_d(9, jc, jk) = p_cc_view(iidx(jc, jb, 9), jk, iblk(jc, jb, 9)) - - p_cc_view(jc, jk, jb); }); Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 1, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 2, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 3, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 4, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 5, 9, jb) * z_d(9, jc, jk); + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - p_coeff_view(6, jc, jk, jb) = ptr_rrdiag(jc, 5, jb) * z_qt_times_d(5); - p_coeff_view(5, jc, jk, jb) = - ptr_rrdiag(jc, 4, jb) * - (z_qt_times_d(4) - - ptr_rutri(jc, 1, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = ptr_rrdiag(jc, 3, jb) * (z_qt_times_d(3) - - ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 3, jb) * p_coeff_view(6, jc, jk, jb)); + ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(3, jc, jk, jb) = ptr_rrdiag(jc, 2, jb) * (z_qt_times_d(2) - - ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb)); + ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(2, jc, jk, jb) = ptr_rrdiag(jc, 1, jb) * (z_qt_times_d(1) - + ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 10, jb) * p_coeff_view(6, jc, jk, jb)); - p_coeff_view(1, jc, jk, jb) = + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - - p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5); + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); }); } -- GitLab From bcd3a6578b6740077ae6a9167a7d137d106508c0 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Tue, 25 Feb 2025 14:18:33 +0100 Subject: [PATCH 44/76] Add cpp implementations (untested) --- src/horizontal/lib_divrot.cpp | 617 +++++++++++++++++++++++++++++++++- src/horizontal/lib_divrot.hpp | 29 ++ 2 files changed, 630 insertions(+), 16 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index addd485..5b51d98 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -25,22 +25,27 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(3); - UnmanagedT3D z_qt_times_d(2); + UnmanagedT1D z_d(3); + UnmanagedT1D z_qt_times_d(2); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); @@ -108,21 +113,26 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_b(3); + UnmanagedT1D z_b(3); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); @@ -180,22 +190,29 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT3D z_qt_times_d(5); + UnmanagedT1D z_qt_times_d(5); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, @@ -208,8 +225,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, - KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { - p_coeff_view(z, jc, jk, jb) = 0; + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; }); } @@ -326,3 +343,571 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_b(lsq_high_set_dim_c, nproma, elev); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); + UnmanagedT1D z_qt_times_d(9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, {9, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_c_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_c_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); + p_coeff_view(8, jc, jk, jb) = + ptr_rrdiag(jc, 7, jb) * + (z_qt_times_d(7) - + ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); + p_coeff_view(7, jc, jk, jb) = + ptr_rrdiag(jc, 6, jb) * + (z_qt_times_d(6) - + (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(6, jc, jk, jb) = + ptr_rrdiag(jc, 5, jb) * + (z_qt_times_d(5) - + (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + + ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(0, jc, jk, jb) = + p_cc(jc, jk, jb) - + (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) + + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) + + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) + + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) + + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) + + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8)); + }); + } + + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT1D z_b(9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy({slev, i_startidx, 0}, + {elev, i_endidx, 9}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_init", initPolicy, + KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(9, jc, jk, jb) = + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8, jc, jk); + p_coeff_view(8, jc, jk, jb) = + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8, jc, jk); + p_coeff_view(7, jc, jk, jb) = + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8, jc, jk); + p_coeff_view(6, jc, jk, jb) = + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8, jc, jk); + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) - + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) - + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) - + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); + }); + } +} diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index 6977e5d..c32ee12 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -40,3 +40,32 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); -- GitLab From 71e12c99c0df8d3b8a4ad0df89a8622c6c1e3a01 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 26 Feb 2025 11:03:35 +0100 Subject: [PATCH 45/76] Add cpp implementations (untested) --- src/horizontal/lib_divrot.cpp | 442 +++++++++++++++++++++++++++++++++- src/horizontal/lib_divrot.hpp | 52 +++- 2 files changed, 486 insertions(+), 8 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 5b51d98..3586b03 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -182,7 +182,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -347,7 +347,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -487,7 +487,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -730,7 +730,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -910,4 +910,438 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); }); } + + Kokkos::fence(); +} + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, + const T *in2, T &out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); + UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_2field_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + + out2_view(jc, jk, jb) = + in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + + UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); + UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, + {elev[ji], i_endidx}); + Kokkos::parallel_for( + "div4d_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + f4dout_view(jc, jk, jb, ji) = + f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) * + geofac_div_view(jc, 0, jb) + + f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) * + geofac_div_view(jc, 1, jb) + + f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) * + geofac_div_view(jc, 2, jb); + }); + } + } +} + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T &div_vec_c, const T *opt_in2, T &opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); + UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); + + UnmanagedT3D aux_c(nproma, nlev, nblks_c); + UnmanagedT3D aux_c2(nproma, nlev, nblks_c); + + int i_startblk = i_startblk_in[0]; + int i_endblk = i_endblk_in[0]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + aux_c2(jc, jk, jb) = + opt_in2(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + opt_in2(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + opt_in2(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } + + if (patch_id > 1 || l_limited_area) { + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step3", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb); + }); + } + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step4", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb); + }); + } + } + } + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step5", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + opt_out2_view(jc, jk, jb) = + aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step6", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } +} + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } +} + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } + + if (!acc_async) + Kokkos::fence(); } diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index c32ee12..36ed138 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -35,7 +35,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -44,7 +44,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -54,7 +54,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -63,9 +63,53 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T &div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, + const T *in2, T &out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T &div_vec_c, const T *opt_in2, T &opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v); -- GitLab From 73767f96c6109713285ae4d408e9406efdbfb1a6 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 14:41:29 +0100 Subject: [PATCH 46/76] Fix bug and add first test --- src/horizontal/lib_divrot.cpp | 76 +++++++++----- src/horizontal/lib_divrot.hpp | 16 +-- test/c/CMakeLists.txt | 2 +- test/c/test_horizontal_divrot.cpp | 159 ++++++++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 35 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 3586b03..c615a42 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -9,19 +9,20 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <iostream> #include <lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> #include <vector> template <typename T> -void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_qtmat_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int lsq_dim_unk, - int lsq_dim_c) { +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -36,14 +37,14 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_d(3); - UnmanagedT1D z_qt_times_d(2); + Kokkos::View<T *> z_d("z_d", 3); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 2); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -102,13 +103,32 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } +template void +recon_lsq_cell_l<float>(const float *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *lsq_qtmat_c, + const float *lsq_rmat_rdiag_c, + const float *lsq_rmat_utri_c, const float *lsq_moments, + float *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool l_consv, bool lacc, bool acc_async, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); + +template void recon_lsq_cell_l<double>( + const double *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *lsq_qtmat_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_pseudoinv, const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, + bool lacc, bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -129,8 +149,8 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -186,7 +206,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -209,8 +230,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -351,7 +372,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -372,8 +393,8 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -491,7 +512,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -514,8 +536,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -734,7 +756,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -755,8 +777,8 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index 36ed138..db60b29 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -17,10 +17,10 @@ template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int lsq_dim_unk, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -29,7 +29,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, + bool lacc, bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -39,7 +39,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, @@ -48,7 +49,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -58,7 +59,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, @@ -67,7 +69,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 90ab1e3..927e4e1 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -36,7 +36,7 @@ set(SOURCES test_horizontal_recon.cpp test_horizontal_rot.cpp test_tdma_solver.cpp - test_interpolation_vector.cpp + # test_interpolation_vector.cpp test_intp_rbf.cpp test_interpolation_scalar.cpp ) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index e69de29..1915fa4 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -0,0 +1,159 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <horizontal/lib_divrot.hpp> +#include <vector> + +// Template helpers for combining multiple dimension array sizes +template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +template <typename T> size_t dim_combine(T dim) { + return static_cast<size_t>(dim); +} +template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { + return static_cast<size_t>(dim) * dim_combine(dims...); +} + +// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +// Template function for LayoutLeft ID access in compile time +template <class T, auto> using always_t = T; +template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } +template <int FirstDim, int... Dims> +constexpr int At_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); +} +template <int FirstDim, int... Dims> +// At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +// LayoutLeft +int At(int id, always_t<int, Dims>... ids) { + return id + At_impl<Dims...>(FirstDim, ids...); +} + +typedef ::testing::Types<float, double> ValueTypes; + +template <typename ValueType> +class HorizontalDivrotTest : public ::testing::Test { +protected: + // [lsq_dim_c, lsq_dim_unk] + static constexpr std::tuple<int, int> + init_lsq_dim(ReconstructionMethod method) { + switch (method) { + case ReconstructionMethod::linear: + return std::make_tuple(3, 2); + case ReconstructionMethod::quadratic: + return std::make_tuple(9, 5); + case ReconstructionMethod::cubic: + return std::make_tuple(9, 9); + } + } + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 1; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(ReconstructionMethod::linear); + static constexpr int lsq_dim_c = std::get<0>(lsq_dim); + static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); + + // Parameter values. + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // No conservative correction + + std::vector<ValueType> p_cc; + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> lsq_qtmat_c; + std::vector<ValueType> lsq_rmat_rdiag_c; + std::vector<ValueType> lsq_rmat_utri_c; + std::vector<ValueType> lsq_moments; + std::vector<ValueType> p_coeff; + + HorizontalDivrotTest() { + p_cc.resize(dim_combine(nproma, nlev, nblks_c)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); + lsq_rmat_utri_c.resize(dim_combine( + nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); + lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + p_coeff.resize(dim_combine(lsq_dim_c, nproma, nlev, nblks_c)); + } +}; + +TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { + this->init_lsq_dim(ReconstructionMethod::linear); + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (TypeParam)(i + 1); + + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 2)] = i; + for (int j = 0; j < 3; ++j) { + this->cell_neighbor_blk[At<nproma, nblks_c, 3>(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, + 0)] = 1.0; + this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, + 0)] = 0.5; + this->p_coeff[At<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + } + + this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c + [At<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( + i, 0, 0)] = 0.1; + + this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; + this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + } + + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + EXPECT_NEAR(this->p_coeff[0], 0.34, 1e-6); + EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); + EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); +} -- GitLab From f2408b7d59c83679e9d7908fdc77c2d7a4f9c1ba Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 14:42:51 +0100 Subject: [PATCH 47/76] Reverse commented file --- test/c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 927e4e1..90ab1e3 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -36,7 +36,7 @@ set(SOURCES test_horizontal_recon.cpp test_horizontal_rot.cpp test_tdma_solver.cpp - # test_interpolation_vector.cpp + test_interpolation_vector.cpp test_intp_rbf.cpp test_interpolation_scalar.cpp ) -- GitLab From 5fbca4150b4a89228307a19cabf87f5847edf05b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 15:28:43 +0100 Subject: [PATCH 48/76] Add comments for templates --- test/c/test_horizontal_divrot.cpp | 37 +++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 1915fa4..26ba118 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -14,11 +14,15 @@ #include <horizontal/lib_divrot.hpp> #include <vector> -// Template helpers for combining multiple dimension array sizes +// Template helpers for combining multiple dimension array sizes. +// The base function of dimension combine. Should not be used. template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +// Template specialization of only one dimension, returns the dimension itself. template <typename T> size_t dim_combine(T dim) { return static_cast<size_t>(dim); } +// Template specialization of picking out the first dimension. The combined +// dimension is the first dimension times the combined dimension of the rest. template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { return static_cast<size_t>(dim) * dim_combine(dims...); } @@ -31,16 +35,41 @@ enum class ReconstructionMethod { }; // Template function for LayoutLeft ID access in compile time +// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its +// corresponding vector id (LayoutLeft) by +// At<2, 3, 4, 5>(id1, id2, id3, id4). +// The At_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive At_impl function. In this example, +// At<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + At_impl<3, 4, 5>(2, id2, id3, id4); +// } +// At_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + At_impl<4, 5>(2 * 3, id3, id4); +// } +// At_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + At_impl<5>(2 * 3 * 4, id4); +// } +// At_impl<5>(2 * 3 * 4, id4) { +// return id4 * 2 * 3 * 4; +// } +// Which gives +// At<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// id3 * 2 * 3 + id4 * 2 * 3 * 4 +// Helper type converting integer numbers to int template <class T, auto> using always_t = T; +// Base function of At_impl. Should not be used. template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +// Template specialization of the last ID template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } +// Template specialization of At_impl, accumulate the return value using the +// first id and pass the prefix to the next recursive At_impl function. template <int FirstDim, int... Dims> -constexpr int At_impl(int prefix, int id, always_t<int, Dims>... ids) { +int At_impl(int prefix, int id, always_t<int, Dims>... ids) { return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); } -template <int FirstDim, int... Dims> // At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft +// LayoutLeft. Use this function instead of At_impl. +template <int FirstDim, int... Dims> int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -- GitLab From 698c9f913ad73324f6fc227dd4590cbbc8456eb4 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 16:04:35 +0100 Subject: [PATCH 49/76] Make reconstruction method templated --- test/c/test_horizontal_divrot.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 26ba118..082afa3 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -74,11 +74,21 @@ int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -typedef ::testing::Types<float, double> ValueTypes; +// ValueType struct for compute precision and reconstruction method. +template <typename ValueType, int ReconMethod> struct DivrotType { + using type = ValueType; + static constexpr int get_recon_method() { return ReconMethod; }; +}; + +typedef ::testing::Types< + DivrotType<float, static_cast<int>(ReconstructionMethod::linear)>, + DivrotType<double, static_cast<int>(ReconstructionMethod::linear)>> + ValueTypes; -template <typename ValueType> +template <typename ValueTypes> class HorizontalDivrotTest : public ::testing::Test { protected: + using ValueType = typename ValueTypes::type; // [lsq_dim_c, lsq_dim_unk] static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { @@ -96,8 +106,8 @@ protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 1; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = - init_lsq_dim(ReconstructionMethod::linear); + static constexpr std::tuple<int, int> lsq_dim = init_lsq_dim( + static_cast<ReconstructionMethod>(ValueTypes::get_recon_method())); static constexpr int lsq_dim_c = std::get<0>(lsq_dim); static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); @@ -137,7 +147,8 @@ protected: TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { - this->init_lsq_dim(ReconstructionMethod::linear); + using ValueType = typename TestFixture::ValueType; + constexpr int nproma = TestFixture::nproma; constexpr int nlev = TestFixture::nlev; constexpr int nblks_c = TestFixture::nblks_c; @@ -146,7 +157,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (TypeParam)(i + 1); + this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; @@ -173,7 +184,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } - recon_lsq_cell_l<TypeParam>( + recon_lsq_cell_l<ValueType>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), -- GitLab From 4fa4a6f49193a0a425c99562cfeb5638cf8a1f67 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Fri, 28 Feb 2025 18:55:40 +0100 Subject: [PATCH 50/76] Instantiate and fix bugs --- src/horizontal/lib_divrot.cpp | 437 +++++++++++++++++++++++----------- 1 file changed, 297 insertions(+), 140 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index c615a42..9dce2e4 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -29,8 +29,6 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, @@ -125,7 +123,7 @@ template void recon_lsq_cell_l<double>( template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, @@ -136,15 +134,13 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_b(3); + Kokkos::View<T *> z_b("z_b", 3); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); @@ -199,10 +195,26 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } +template void recon_lsq_cell_l_svd<float>( + const float *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *lsq_pseudoinv, + const float *lsq_moments, float *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + +template void recon_lsq_cell_l_svd<double>( + const double *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *lsq_pseudoinv, + const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -214,18 +226,14 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT1D z_qt_times_d(5); + Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 5); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -353,7 +361,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(0, jc, jk, jb) = - p_cc(jc, jk, jb) - + p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - @@ -365,10 +373,28 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } +template void recon_lsq_cell_q<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, + const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_q<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -380,15 +406,13 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_b(lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_b("z_b", lsq_high_set_dim_c, nproma, elev); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -505,10 +529,26 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } +template void recon_lsq_cell_q_svd<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_q_svd<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -520,18 +560,14 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT1D z_qt_times_d(9); + Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -733,7 +769,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(0, jc, jk, jb) = - p_cc(jc, jk, jb) - + p_cc_view(jc, jk, jb) - (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + @@ -749,10 +785,28 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } +template void recon_lsq_cell_c<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, + const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_c<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -764,15 +818,13 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_b(9); + Kokkos::View<T *> z_b("z_b", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -810,115 +862,115 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::parallel_for( "recon_lsq_cell_c_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); p_coeff_view(9, jc, jk, jb) = - lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); p_coeff_view(8, jc, jk, jb) = - lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); p_coeff_view(7, jc, jk, jb) = - lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); p_coeff_view(6, jc, jk, jb) = - lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - @@ -936,6 +988,22 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } +template void recon_lsq_cell_c_svd<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_c_svd<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, @@ -978,10 +1046,24 @@ void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, } } +template void div3d<float>(const float *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const float *geofac_div, + float *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + +template void div3d<double>(const double *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + double *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + template <typename T> void div3d_2field(const T *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, - const T *in2, T &out2, int i_startblk, int i_endblk, + const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, + const T *in2, T *out2, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { // Wrap raw pointers in unmanaged Kokkos Views. @@ -1033,9 +1115,23 @@ void div3d_2field(const T *vec_e, const int *cell_edge_idx, } } +template void div3d_2field<float>( + const float *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, float *div_vec_c, const float *in2, float *out2, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); + +template void +div3d_2field<double>(const double *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + double *div_vec_c, const double *in2, double *out2, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + int nlev, int nblks_c, int nblks_e); + template <typename T> void div4d(const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + const T *geofac_div, const T *f4din, T *f4dout, int dim4d, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, const int *slev, const int *elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { @@ -1084,11 +1180,25 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, } } +template void div4d<float>(const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, const float *f4din, + float *f4dout, int dim4d, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + +template void div4d<double>(const int *cell_edge_idx, const int *cell_edge_blk, + const double *geofac_div, const double *f4din, + double *f4dout, int dim4d, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + template <typename T> void div_avg(const T *vec_e, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const int *cell_edge_idx, const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, - T &div_vec_c, const T *opt_in2, T &opt_out2, + T *div_vec_c, const T *opt_in2, T *opt_out2, const int *i_startblk_in, const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, int slev, int elev, int nproma, int patch_id, bool l_limited_area, @@ -1117,8 +1227,8 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); - UnmanagedT3D aux_c(nproma, nlev, nblks_c); - UnmanagedT3D aux_c2(nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c); int i_startblk = i_startblk_in[0]; int i_endblk = i_endblk_in[0]; @@ -1142,11 +1252,11 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * geofac_div_view(jc, 2, jb); aux_c2(jc, jk, jb) = - opt_in2(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * geofac_div_view(jc, 0, jb) + - opt_in2(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * geofac_div_view(jc, 1, jb) + - opt_in2(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * geofac_div_view(jc, 2, jb); }); } @@ -1263,9 +1373,32 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } +template void div_avg<float>(const float *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, + const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, const float *avg_coeff, + float *div_vec_c, const float *opt_in2, + float *opt_out2, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, + int nblks_e); + +template void +div_avg<double>(const double *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + const double *avg_coeff, double *div_vec_c, + const double *opt_in2, double *opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); + template <typename T> void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, int nlev, int nblks_e, int nblks_v) { @@ -1314,9 +1447,21 @@ void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, } } +template void rot_vertex_atmos<float>( + const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + +template void rot_vertex_atmos<double>( + const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + template <typename T> void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) { @@ -1367,3 +1512,15 @@ void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, if (!acc_async) Kokkos::fence(); } + +template void rot_vertex_ri<float>( + const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); + +template void rot_vertex_ri<double>( + const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); -- GitLab From f20b5074dbe03f870a5d4ca93d9b8093663b0a42 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 1 Mar 2025 10:47:43 +0100 Subject: [PATCH 51/76] Separate linear, quadratic, and cubic tests --- test/c/test_horizontal_divrot.cpp | 65 ++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 082afa3..8bd782c 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -74,21 +74,9 @@ int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -// ValueType struct for compute precision and reconstruction method. -template <typename ValueType, int ReconMethod> struct DivrotType { - using type = ValueType; - static constexpr int get_recon_method() { return ReconMethod; }; -}; - -typedef ::testing::Types< - DivrotType<float, static_cast<int>(ReconstructionMethod::linear)>, - DivrotType<double, static_cast<int>(ReconstructionMethod::linear)>> - ValueTypes; - -template <typename ValueTypes> +template <typename ValueType, int ReconMethod> class HorizontalDivrotTest : public ::testing::Test { protected: - using ValueType = typename ValueTypes::type; // [lsq_dim_c, lsq_dim_unk] static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { @@ -106,8 +94,8 @@ protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 1; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = init_lsq_dim( - static_cast<ReconstructionMethod>(ValueTypes::get_recon_method())); + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); static constexpr int lsq_dim_c = std::get<0>(lsq_dim); static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); @@ -144,11 +132,32 @@ protected: } }; -TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); +template <typename ValueType> +class HorizontalDivrotLinearTest + : public HorizontalDivrotTest< + ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; -TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { - using ValueType = typename TestFixture::ValueType; +template <typename ValueType> +class HorizontalDivrotQuadraticTest + : public HorizontalDivrotTest< + ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; + +template <typename ValueType> +class HorizontalDivrotCubicTest + : public HorizontalDivrotTest<ValueType, static_cast<int>( + ReconstructionMethod::cubic)> { +}; + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 3); + EXPECT_EQ(TestFixture::lsq_dim_unk, 2); +} +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int nproma = TestFixture::nproma; constexpr int nlev = TestFixture::nlev; constexpr int nblks_c = TestFixture::nblks_c; @@ -184,7 +193,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } - recon_lsq_cell_l<ValueType>( + recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), @@ -197,3 +206,21 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); } + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 9); + EXPECT_EQ(TestFixture::lsq_dim_unk, 5); +} + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 9); + EXPECT_EQ(TestFixture::lsq_dim_unk, 9); +} -- GitLab From 7fd1f5dd78e35d84faee26310bc0ab1aeb853664 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 3 Mar 2025 09:48:25 +0100 Subject: [PATCH 52/76] Use snake case for at functions --- test/c/test_horizontal_divrot.cpp | 79 ++++++++++++++++--------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 8bd782c..a99556f 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -37,41 +37,41 @@ enum class ReconstructionMethod { // Template function for LayoutLeft ID access in compile time // For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its // corresponding vector id (LayoutLeft) by -// At<2, 3, 4, 5>(id1, id2, id3, id4). -// The At_impl then adds the id from beginning to the end and pass the id prefix -// to the next recursive At_impl function. In this example, -// At<2, 3, 4, 5>(id1, id2, id3, id4) { -// return id1 + At_impl<3, 4, 5>(2, id2, id3, id4); +// at<2, 3, 4, 5>(id1, id2, id3, id4). +// The at_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive at_impl function. In this example, +// at<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); // } -// At_impl<3, 4, 5>(2, id2, id3, id4) { -// return id2 * 2 + At_impl<4, 5>(2 * 3, id3, id4); +// at_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); // } -// At_impl<4, 5>(2 * 3, id3, id4) { -// return id3 * 2 * 3 + At_impl<5>(2 * 3 * 4, id4); +// at_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); // } -// At_impl<5>(2 * 3 * 4, id4) { +// at_impl<5>(2 * 3 * 4, id4) { // return id4 * 2 * 3 * 4; // } // Which gives -// At<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + // id3 * 2 * 3 + id4 * 2 * 3 * 4 // Helper type converting integer numbers to int template <class T, auto> using always_t = T; -// Base function of At_impl. Should not be used. -template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +// Base function of at_impl. Should not be used. +template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } // Template specialization of the last ID -template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } -// Template specialization of At_impl, accumulate the return value using the -// first id and pass the prefix to the next recursive At_impl function. +template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } +// Template specialization of at_impl, accumulate the return value using the +// first id and pass the prefix to the next recursive at_impl function. template <int FirstDim, int... Dims> -int At_impl(int prefix, int id, always_t<int, Dims>... ids) { - return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); +int at_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); } -// At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft. Use this function instead of At_impl. +// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +// LayoutLeft. Use this function instead of at_impl. template <int FirstDim, int... Dims> -int At(int id, always_t<int, Dims>... ids) { - return id + At_impl<Dims...>(FirstDim, ids...); +int at(int id, always_t<int, Dims>... ids) { + return id + at_impl<Dims...>(FirstDim, ids...); } template <typename ValueType, int ReconMethod> @@ -166,31 +166,31 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); + this->p_cc[at<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 2)] = i; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 1)] = i; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 2)] = i; for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[At<nproma, nblks_c, 3>(i, 0, j)] = 0; + this->cell_neighbor_blk[at<nproma, nblks_c, 3>(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, + this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, + this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, 0)] = 0.5; - this->p_coeff[At<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + this->p_coeff[at<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; } - this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; + this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; this->lsq_rmat_utri_c - [At<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( + [at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( i, 0, 0)] = 0.1; - this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; - this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; + this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } recon_lsq_cell_l<TypeParam>( @@ -202,9 +202,12 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - EXPECT_NEAR(this->p_coeff[0], 0.34, 1e-6); - EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); - EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); } typedef ::testing::Types<float, double> ValueTypes; -- GitLab From 7c9ac0209a86bf2fd1f6725cf88fa65b45e66358 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 3 Mar 2025 13:24:52 +0100 Subject: [PATCH 53/76] Define template and instantiate functions --- src/horizontal/CMakeLists.txt | 4 - src/horizontal/lib_divrot.cpp | 197 +++-------------------------- src/horizontal/lib_divrot.hpp | 198 ++++++++++++++++-------------- test/c/test_horizontal_divrot.cpp | 3 +- 4 files changed, 124 insertions(+), 278 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 75916bc..f3b75c0 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -59,10 +59,6 @@ target_include_directories( # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> -<<<<<<< HEAD -======= - $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> ->>>>>>> 670c30e (Add cpp implementations (untested)) PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple # compile languages diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 9dce2e4..be6d9da 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -10,10 +10,11 @@ // --------------------------------------------------------------- #include <iostream> -#include <lib_divrot.hpp> -#include <support/mo_lib_loopindices.hpp> #include <vector> +#include <horizontal/lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -101,24 +102,7 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } -template void -recon_lsq_cell_l<float>(const float *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const float *lsq_qtmat_c, - const float *lsq_rmat_rdiag_c, - const float *lsq_rmat_utri_c, const float *lsq_moments, - float *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, bool l_consv, bool lacc, bool acc_async, - int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); - -template void recon_lsq_cell_l<double>( - const double *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const double *lsq_qtmat_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L); template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, @@ -195,21 +179,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } -template void recon_lsq_cell_l_svd<float>( - const float *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const float *lsq_pseudoinv, - const float *lsq_moments, float *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); - -template void recon_lsq_cell_l_svd<double>( - const double *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const double *lsq_pseudoinv, - const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, @@ -373,23 +343,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } -template void recon_lsq_cell_q<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, - const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_q<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q); template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, @@ -529,21 +483,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } -template void recon_lsq_cell_q_svd<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_q_svd<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, @@ -785,23 +725,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } -template void recon_lsq_cell_c<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, - const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_c<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C); template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, @@ -988,21 +912,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } -template void recon_lsq_cell_c_svd<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_c_svd<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD); template <typename T> void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, @@ -1046,19 +956,7 @@ void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, } } -template void div3d<float>(const float *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const float *geofac_div, - float *div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); - -template void div3d<double>(const double *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - double *div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D); template <typename T> void div3d_2field(const T *vec_e, const int *cell_edge_idx, @@ -1115,19 +1013,7 @@ void div3d_2field(const T *vec_e, const int *cell_edge_idx, } } -template void div3d_2field<float>( - const float *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, float *div_vec_c, const float *in2, float *out2, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); - -template void -div3d_2field<double>(const double *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - double *div_vec_c, const double *in2, double *out2, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, bool lacc, - int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD); template <typename T> void div4d(const int *cell_edge_idx, const int *cell_edge_blk, @@ -1180,19 +1066,7 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, } } -template void div4d<float>(const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, const float *f4din, - float *f4dout, int dim4d, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); - -template void div4d<double>(const int *cell_edge_idx, const int *cell_edge_blk, - const double *geofac_div, const double *f4din, - double *f4dout, int dim4d, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D); template <typename T> void div_avg(const T *vec_e, const int *cell_neighbor_idx, @@ -1373,28 +1247,7 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } -template void div_avg<float>(const float *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, - const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, const float *avg_coeff, - float *div_vec_c, const float *opt_in2, - float *opt_out2, const int *i_startblk_in, - const int *i_endblk_in, const int *i_startidx_in, - const int *i_endidx_in, int slev, int elev, - int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, - int nblks_e); - -template void -div_avg<double>(const double *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - const double *avg_coeff, double *div_vec_c, - const double *opt_in2, double *opt_out2, - const int *i_startblk_in, const int *i_endblk_in, - const int *i_startidx_in, const int *i_endidx_in, int slev, - int elev, int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG); template <typename T> void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, @@ -1447,17 +1300,7 @@ void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, } } -template void rot_vertex_atmos<float>( - const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); - -template void rot_vertex_atmos<double>( - const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS); template <typename T> void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, @@ -1513,14 +1356,4 @@ void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, Kokkos::fence(); } -template void rot_vertex_ri<float>( - const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); - -template void rot_vertex_ri<double>( - const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI); diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index db60b29..a0cc8cf 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -12,106 +12,122 @@ #pragma once #include <Kokkos_Core.hpp> +#include <types.hpp> -template <typename T> -void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_qtmat_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \ + void recon_lsq_cell_l( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) -template <typename T> -void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \ + void recon_lsq_cell_l_svd( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) -template <typename T> -void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ + void recon_lsq_cell_q( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ + void recon_lsq_cell_q_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ + void recon_lsq_cell_c( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ + void recon_lsq_cell_c_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, T &div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV3D(_type) \ + void div3d(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, \ + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div3d_2field(const T *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, - const T *in2, T &out2, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \ + void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, const _type *in2, _type *out2, \ + int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div4d(const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, const T *f4din, T &f4dout, int dim4d, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV4D(_type) \ + void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \ + const _type *geofac_div, const _type *f4din, _type *f4dout, \ + int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, const int *slev, const int *elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div_avg(const T *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, - T &div_vec_c, const T *opt_in2, T &opt_out2, - const int *i_startblk_in, const int *i_endblk_in, - const int *i_startidx_in, const int *i_endidx_in, int slev, - int elev, int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV_AVG(_type) \ + void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \ + _type *opt_out2, const int *i_startblk_in, \ + const int *i_endblk_in, const int *i_startidx_in, \ + const int *i_endidx_in, int slev, int elev, int nproma, \ + int patch_id, bool l_limited_area, bool l2fields, bool lacc, \ + int nlev, int nblks_c, int nblks_e) -template <typename T> -void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); +#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \ + void rot_vertex_atmos( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_e, int nblks_v) -template <typename T> -void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, bool lacc, - bool acc_async, int nlev, int nblks_e, int nblks_v); +#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \ + void rot_vertex_ri( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) + +// Declare as templates +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T); +template <typename T> ICONMATH_DECLARE_DIV3D(T); +template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T); +template <typename T> ICONMATH_DECLARE_DIV4D(T); +template <typename T> ICONMATH_DECLARE_DIV_AVG(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T); diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a99556f..29693ff 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -9,10 +9,11 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <vector> + #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> -#include <vector> // Template helpers for combining multiple dimension array sizes. // The base function of dimension combine. Should not be used. -- GitLab From d6427bc5ee69da34e4b4b03d8f3f71bf0aa6ac36 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 8 Mar 2025 20:53:35 +0100 Subject: [PATCH 54/76] Add comments and adapt Doxygen style --- test/c/test_horizontal_divrot.cpp | 75 ++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 29693ff..3a46b9e 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -15,27 +15,43 @@ #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> -// Template helpers for combining multiple dimension array sizes. -// The base function of dimension combine. Should not be used. +// Template function for computing array size. +// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by +// dim_combine(2, 3, 4, 5). +// Which will automatically instantiate +// dim_combine<int, int, int, int>(2, 3, 4, 5). +// The function then call dim_combine recursively +// dim_combine<int, int, int, int>(2, 3, 4, 5) { +// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); +// } +// dim_combine<int, int, int>(3, 4, 5) { +// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); +// } +// dim_combine<int, int>(4, 5) { +// return static_cast<size_t>(4) * dim_combine<int>(5); +// } +// Where the last dim_combine is specialized as +// dim_combine<int>(5) { +// return static_cast<size_t>(5); +// } +// Which gives +// dim_combine<int, int, int, int>(2, 3, 4, 5) = +// static_cast<size_t>(2) * static_cast<size_t>(3) * +// static_cast<size_t>(4) * static_cast<size_t>(5) +/// Template helpers for combining multiple dimension array sizes. +/// The base function of dimension combine. Should not be used. template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } -// Template specialization of only one dimension, returns the dimension itself. +/// Template specialization of only one dimension, returns the dimension itself. template <typename T> size_t dim_combine(T dim) { return static_cast<size_t>(dim); } -// Template specialization of picking out the first dimension. The combined -// dimension is the first dimension times the combined dimension of the rest. +/// Template specialization of picking out the first dimension. The combined +/// dimension is the first dimension times the combined dimension of the rest. template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { return static_cast<size_t>(dim) * dim_combine(dims...); } -// Enum class for the reconstruction method -enum class ReconstructionMethod { - linear, - quadratic, - cubic, -}; - -// Template function for LayoutLeft ID access in compile time +// Template function for LayoutLeft ID access in compile time. // For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its // corresponding vector id (LayoutLeft) by // at<2, 3, 4, 5>(id1, id2, id3, id4). @@ -56,29 +72,38 @@ enum class ReconstructionMethod { // Which gives // at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + // id3 * 2 * 3 + id4 * 2 * 3 * 4 -// Helper type converting integer numbers to int +/// Helper type converting integer numbers to int template <class T, auto> using always_t = T; -// Base function of at_impl. Should not be used. +/// Base function of at_impl. Should not be used. template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } -// Template specialization of the last ID +/// Template specialization of the last ID template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } -// Template specialization of at_impl, accumulate the return value using the -// first id and pass the prefix to the next recursive at_impl function. +/// Template specialization of at_impl, accumulate the return value using the +/// first id and pass the prefix to the next recursive at_impl function. template <int FirstDim, int... Dims> int at_impl(int prefix, int id, always_t<int, Dims>... ids) { return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); } -// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft. Use this function instead of at_impl. +/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +/// LayoutLeft. Use this function instead of at_impl. template <int FirstDim, int... Dims> int at(int id, always_t<int, Dims>... ids) { return id + at_impl<Dims...>(FirstDim, ids...); } +/// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +/// Base test class for the horizontal divrot tests. Templated for the ValueType +/// and ReconMethod for the reconstruction method. template <typename ValueType, int ReconMethod> class HorizontalDivrotTest : public ::testing::Test { protected: - // [lsq_dim_c, lsq_dim_unk] + // lsq_dim_c and lsq_dim_unk are instantiated in compile time. static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { switch (method) { @@ -133,16 +158,22 @@ protected: } }; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to linear. template <typename ValueType> class HorizontalDivrotLinearTest : public HorizontalDivrotTest< ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to quadratic. template <typename ValueType> class HorizontalDivrotQuadraticTest : public HorizontalDivrotTest< ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to cubic. template <typename ValueType> class HorizontalDivrotCubicTest : public HorizontalDivrotTest<ValueType, static_cast<int>( @@ -194,6 +225,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } + // Test function recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), @@ -203,6 +235,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Check result EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], 0.34, 1e-6); EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], -- GitLab From 850a3bf67c02adad95f95093b5701df6c770adcf Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 8 Mar 2025 22:37:00 +0100 Subject: [PATCH 55/76] Add first random test --- test/c/test_horizontal_divrot.cpp | 158 ++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 20 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 3a46b9e..a57f31a 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -9,11 +9,14 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <iostream> +#include <random> #include <vector> #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> // Template function for computing array size. // For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by @@ -180,6 +183,7 @@ class HorizontalDivrotCubicTest ReconstructionMethod::cubic)> { }; +/// ValueTypes which the divrot tests should run with typedef ::testing::Types<float, double> ValueTypes; TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); @@ -196,33 +200,38 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int lsq_dim_c = TestFixture::lsq_dim_c; constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[at<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 1)] = i; - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 2)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[at<nproma, nblks_c, 3>(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, - 0)] = 1.0; - this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, - 0)] = 0.5; - this->p_coeff[at<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } - this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; - this->lsq_rmat_utri_c - [at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( - i, 0, 0)] = 0.1; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; - this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; } // Test function @@ -244,7 +253,118 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } -typedef ::testing::Types<float, double> ValueTypes; +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < 3; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(3); + std::vector<TypeParam> z_qt_times_d(2); + std::vector<TypeParam> p_result(lsq_dim_c * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + z_d[0] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[1] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[2] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_qt_times_d[0] = this->lsq_qtmat_c[qtmat_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_qtmat_c[qtmat_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_qtmat_c[qtmat_at(jc, 0, 2, jb)] * z_d[2]; + z_qt_times_d[1] = this->lsq_qtmat_c[qtmat_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_qtmat_c[qtmat_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_qtmat_c[qtmat_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_c, nproma>(2, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + p_result[at<lsq_dim_c, nproma>(1, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d[0] - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_c, nproma>(2, jc)]); + p_result[at<lsq_dim_c, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + } + } + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + p_result[at<lsq_dim_c, nproma>(0, jc)] = + p_result[at<lsq_dim_c, nproma>(0, jc)] - + p_result[at<lsq_dim_c, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_c, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_c; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_c, nproma>(i, jc))], 1e-6); + } + } +} TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); @@ -253,8 +373,6 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_unk, 5); } -typedef ::testing::Types<float, double> ValueTypes; - TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { -- GitLab From 636061bb714f87b4531c98c9e28e80b0d563e7ab Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sun, 9 Mar 2025 11:06:02 +0100 Subject: [PATCH 56/76] Fix bug and add tests --- src/horizontal/lib_divrot.cpp | 88 ++++----- src/horizontal/lib_divrot.hpp | 23 +-- test/c/test_horizontal_divrot.cpp | 314 ++++++++++++++++++++++++------ 3 files changed, 306 insertions(+), 119 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index be6d9da..164be38 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -36,14 +36,14 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_d("z_d", 3); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 2); + Kokkos::View<T *> z_d("z_d", lsq_dim_c); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -124,13 +124,13 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_b("z_b", 3); + Kokkos::View<T *> z_b("z_b", lsq_dim_c); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -183,13 +183,12 @@ ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -202,14 +201,14 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 5); + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -219,9 +218,10 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -351,9 +351,8 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + bool l_limited_area, bool lacc, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -366,21 +365,22 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_b("z_b", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_svd_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -487,13 +487,12 @@ ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -506,14 +505,14 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -523,9 +522,10 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {9, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_c_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -754,20 +754,20 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy({slev, i_startidx, 0}, - {elev, i_endidx, 9}); + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( + {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); Kokkos::parallel_for( "recon_lsq_cell_c_svd_init", initPolicy, KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { @@ -1156,7 +1156,7 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { i_startblk = i_startblk_in[1]; i_endblk = i_endblk_in[1]; diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index a0cc8cf..dae8282 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -36,31 +36,28 @@ #define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ void recon_lsq_cell_q( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ - const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ void recon_lsq_cell_q_svd( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ void recon_lsq_cell_c( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ - const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ void recon_lsq_cell_c_svd( \ diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a57f31a..37110db 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -134,10 +134,12 @@ protected: int i_startidx_in = 0; int i_endidx_in = nproma; // Full range: 0 .. nproma-1 int slev = 0; - int elev = nlev; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. - bool l_consv = true; // No conservative correction + int elev = nlev; // Full vertical range (0 .. nlev-1) + int patch_id = 0; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // With conservative correction. + bool l_limited_area = true; // Limited area setup std::vector<ValueType> p_cc; std::vector<int> cell_neighbor_idx; @@ -150,14 +152,14 @@ protected: HorizontalDivrotTest() { p_cc.resize(dim_combine(nproma, nlev, nblks_c)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); lsq_rmat_utri_c.resize(dim_combine( nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); - p_coeff.resize(dim_combine(lsq_dim_c, nproma, nlev, nblks_c)); + p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); } }; @@ -201,9 +203,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; const auto &rmat_utri_at = at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; @@ -216,13 +218,12 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } @@ -245,12 +246,15 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); // Check result - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.34, 1e-6); - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.8, 1e-6); - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { @@ -261,9 +265,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; const auto &rmat_utri_at = at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; @@ -278,14 +282,13 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { for (int i = 0; i < nproma; ++i) { this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - for (int j = 0; j < 3; ++j) { + for (int j = 0; j < lsq_dim_c; ++j) { this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); } @@ -308,60 +311,50 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); // Compute reference result - std::vector<TypeParam> z_d(3); - std::vector<TypeParam> z_qt_times_d(2); - std::vector<TypeParam> p_result(lsq_dim_c * nproma); + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { - z_d[0] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_d[1] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_d[2] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_qt_times_d[0] = this->lsq_qtmat_c[qtmat_at(jc, 0, 0, jb)] * z_d[0] + - this->lsq_qtmat_c[qtmat_at(jc, 0, 1, jb)] * z_d[1] + - this->lsq_qtmat_c[qtmat_at(jc, 0, 2, jb)] * z_d[2]; - z_qt_times_d[1] = this->lsq_qtmat_c[qtmat_at(jc, 1, 0, jb)] * z_d[0] + - this->lsq_qtmat_c[qtmat_at(jc, 1, 1, jb)] * z_d[1] + - this->lsq_qtmat_c[qtmat_at(jc, 1, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_c, nproma>(2, jc)] = + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + z_qt_times_d[0] = 0.0; + z_qt_times_d[1] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; - p_result[at<lsq_dim_c, nproma>(1, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * - (z_qt_times_d[0] - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * - p_result[at<lsq_dim_c, nproma>(2, jc)]); - p_result[at<lsq_dim_c, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - } - } - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - p_result[at<lsq_dim_c, nproma>(0, jc)] = - p_result[at<lsq_dim_c, nproma>(0, jc)] - - p_result[at<lsq_dim_c, nproma>(1, jc)] * + (z_qt_times_d[0] - + this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_c, nproma>(2, jc)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * this->lsq_moments[moments_at(jc, jb, 1)]; } } } // Check result - for (int i = 0; i < lsq_dim_c; ++i) { + for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_c, nproma>(i, jc))], 1e-6); + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; } } } @@ -373,6 +366,203 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_unk, 5); } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.24, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 3.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -2.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 2.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -3.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 2.6, 1e-6); +} + +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { -- GitLab From 8eccf45529b51dbda55e4fe3d9500d01d2a072bc Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sun, 9 Mar 2025 11:14:13 +0100 Subject: [PATCH 57/76] Add tests --- test/c/test_horizontal_divrot.cpp | 217 ++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 37110db..f61fc17 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -569,3 +569,220 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_c, 9); EXPECT_EQ(TestFixture::lsq_dim_unk, 9); } + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; + this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; + this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; + this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.28, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.4, 1e-6); +} + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} -- GitLab From e5a1879176f817b7d6af125f8442756500cf16a1 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 12 Mar 2025 15:54:21 +0100 Subject: [PATCH 58/76] Inner product using lambda functions --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/lib_divrot.cpp | 71 +++++++++++++---------------------- 2 files changed, 29 insertions(+), 46 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index f3b75c0..a09fdc2 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -22,7 +22,9 @@ set(Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mod") set_target_properties( iconmath-horizontal PROPERTIES Fortran_MODULE_DIRECTORY "${Fortran_MODULE_DIRECTORY}" - EXPORT_NAME ${PROJECT_NAME}::horizontal) + EXPORT_NAME ${PROJECT_NAME}::horizontal + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON) if(IM_ENABLE_LOOP_EXCHANGE) target_compile_definitions(iconmath-horizontal PRIVATE __LOOP_EXCHANGE) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 164be38..49e72c3 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -15,6 +15,18 @@ #include <horizontal/lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> +#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ + auto inner_product = [=, &_output](int _id, auto &&...ts) { \ + return [=, &_output] { \ + _output(_id) = 0.0; \ + int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ + }; \ + }; \ + auto _func_name = [=]<int... Is>(int _id, \ + std::integer_sequence<int, Is...>) { \ + return inner_product(_id, Is...)(); \ + }; + template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -261,51 +273,20 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, + int jb, int jk, int jc, int unk, + int i) { + z_qt_times_d(unk) += + lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + }; + DECLARE_LAMBDA_INNER_PRODUCT( + dot_product, z_qt_times_d, unk, + lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); + dot_product(0, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(4, std::make_integer_sequence<int, 9>()); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = -- GitLab From 2e04e5a247b7e0ce1dad55be15a42ead7591307b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 12 Mar 2025 15:55:43 +0100 Subject: [PATCH 59/76] Revert "Inner product using lambda functions" This reverts commit 1f6c95b342ea4c6e42388ccd112049e51502dff7. --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/lib_divrot.cpp | 71 ++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index a09fdc2..f3b75c0 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -22,9 +22,7 @@ set(Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mod") set_target_properties( iconmath-horizontal PROPERTIES Fortran_MODULE_DIRECTORY "${Fortran_MODULE_DIRECTORY}" - EXPORT_NAME ${PROJECT_NAME}::horizontal - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON) + EXPORT_NAME ${PROJECT_NAME}::horizontal) if(IM_ENABLE_LOOP_EXCHANGE) target_compile_definitions(iconmath-horizontal PRIVATE __LOOP_EXCHANGE) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 49e72c3..164be38 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -15,18 +15,6 @@ #include <horizontal/lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> -#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ - auto inner_product = [=, &_output](int _id, auto &&...ts) { \ - return [=, &_output] { \ - _output(_id) = 0.0; \ - int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ - }; \ - }; \ - auto _func_name = [=]<int... Is>(int _id, \ - std::integer_sequence<int, Is...>) { \ - return inner_product(_id, Is...)(); \ - }; - template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -273,20 +261,51 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, - int jb, int jk, int jc, int unk, - int i) { - z_qt_times_d(unk) += - lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); - }; - DECLARE_LAMBDA_INNER_PRODUCT( - dot_product, z_qt_times_d, unk, - lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); - dot_product(0, std::make_integer_sequence<int, 9>()); - dot_product(1, std::make_integer_sequence<int, 9>()); - dot_product(2, std::make_integer_sequence<int, 9>()); - dot_product(3, std::make_integer_sequence<int, 9>()); - dot_product(4, std::make_integer_sequence<int, 9>()); + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = -- GitLab From 0dad7998ffd585b68375d682e5b64af592ddfb2f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:22:35 +0100 Subject: [PATCH 60/76] replaced TestFixture:: with this-> --- test/c/test_horizontal_divrot.cpp | 60 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index f61fc17..5e7bb25 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -196,11 +196,11 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -258,11 +258,11 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -367,11 +367,11 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -452,11 +452,11 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { } TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -571,11 +571,11 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -676,11 +676,11 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { } TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; -- GitLab From d981ffc2d9320e805722a337dc1ec89e8c34d816 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:33:27 +0100 Subject: [PATCH 61/76] removed an unused argument to one of the function in mo_lib_divrot --- src/horizontal/lib_divrot.cpp | 2 +- src/horizontal/lib_divrot.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 164be38..a24981d 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -733,7 +733,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index dae8282..b8e9743 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -64,7 +64,7 @@ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + int slev, int elev, int nproma, int patch_id, \ bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ int lsq_dim_c) -- GitLab From f07777c04814b1c9842e1fafbee02264c396e281 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:34:07 +0100 Subject: [PATCH 62/76] added unit tests for all the svd functions --- test/c/test_horizontal_divrot.cpp | 508 ++++++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 5e7bb25..11f98aa 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -148,6 +148,7 @@ protected: std::vector<ValueType> lsq_rmat_rdiag_c; std::vector<ValueType> lsq_rmat_utri_c; std::vector<ValueType> lsq_moments; + std::vector<ValueType> lsq_pseudoinv; std::vector<ValueType> p_coeff; HorizontalDivrotTest() { @@ -159,6 +160,7 @@ protected: lsq_rmat_utri_c.resize(dim_combine( nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); } }; @@ -257,6 +259,64 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -359,6 +419,95 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { } } +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { @@ -451,6 +600,79 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { 2.6, 1e-6); } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -563,6 +785,104 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { } } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization is done only for iblk = 0 and ilev = 0 + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + {int jb = 0; + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // for (int jk = this->slev; jk < this->elev; ++jk) { + {int jk = 0; + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { @@ -675,6 +995,99 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { 0.4, 1e-6); } +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -786,3 +1199,98 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { } } } + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + + // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + {int jb = 0; + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // for (int jk = this->slev; jk < this->elev; ++jk) { + {int jk = 0; + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} -- GitLab From 9a853c01c77ecabb0a52bf9389028721f6680e59 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:34:53 +0100 Subject: [PATCH 63/76] removed all TestLsqDimensions --- test/c/test_horizontal_divrot.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 11f98aa..6d0b3c6 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -192,11 +192,6 @@ typedef ::testing::Types<float, double> ValueTypes; TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); -TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 3); - EXPECT_EQ(TestFixture::lsq_dim_unk, 2); -} - TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -510,11 +505,6 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); -TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 9); - EXPECT_EQ(TestFixture::lsq_dim_unk, 5); -} - TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -885,11 +875,6 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); -TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 9); - EXPECT_EQ(TestFixture::lsq_dim_unk, 9); -} - TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; -- GitLab From 6565ce459c0a39c6c5660a85d1b8d8b4a4463f35 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 08:35:04 +0100 Subject: [PATCH 64/76] renamed the unit test to remove redundant parts --- test/c/test_horizontal_divrot.cpp | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 6d0b3c6..4ef6b7d 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -104,7 +104,7 @@ enum class ReconstructionMethod { /// Base test class for the horizontal divrot tests. Templated for the ValueType /// and ReconMethod for the reconstruction method. template <typename ValueType, int ReconMethod> -class HorizontalDivrotTest : public ::testing::Test { +class HorizontalReconTest : public ::testing::Test { protected: // lsq_dim_c and lsq_dim_unk are instantiated in compile time. static constexpr std::tuple<int, int> @@ -151,7 +151,7 @@ protected: std::vector<ValueType> lsq_pseudoinv; std::vector<ValueType> p_coeff; - HorizontalDivrotTest() { + HorizontalReconTest() { p_cc.resize(dim_combine(nproma, nlev, nblks_c)); cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); @@ -168,31 +168,31 @@ protected: /// Test class for the horizontal tests. The reconstruction method is specified /// to linear. template <typename ValueType> -class HorizontalDivrotLinearTest - : public HorizontalDivrotTest< +class HorizontalReconLinearTest + : public HorizontalReconTest< ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; /// Test class for the horizontal tests. The reconstruction method is specified /// to quadratic. template <typename ValueType> -class HorizontalDivrotQuadraticTest - : public HorizontalDivrotTest< +class HorizontalReconQuadraticTest + : public HorizontalReconTest< ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; /// Test class for the horizontal tests. The reconstruction method is specified /// to cubic. template <typename ValueType> -class HorizontalDivrotCubicTest - : public HorizontalDivrotTest<ValueType, static_cast<int>( +class HorizontalReconCubicTest + : public HorizontalReconTest<ValueType, static_cast<int>( ReconstructionMethod::cubic)> { }; /// ValueTypes which the divrot tests should run with typedef ::testing::Types<float, double> ValueTypes; -TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -254,7 +254,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -312,7 +312,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { 0.5, 1e-6); } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -414,7 +414,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { } } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -503,9 +503,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { } } -TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -590,7 +590,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { 2.6, 1e-6); } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -663,7 +663,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { 1.3, 1e-6); } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -775,7 +775,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { } } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -873,9 +873,9 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { } } -TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -980,7 +980,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { 0.4, 1e-6); } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1073,7 +1073,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { 0.2, 1e-6); } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1185,7 +1185,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { } } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVDRandom) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; -- GitLab From 93b46bc51e3ee90c4e4f9a92176fb92d69d0fe63 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 15:29:23 +0100 Subject: [PATCH 65/76] reordered the unit tests --- test/c/test_horizontal_divrot.cpp | 448 +++++++++++++++--------------- 1 file changed, 224 insertions(+), 224 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 4ef6b7d..60f0641 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -254,64 +254,6 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { 1.0, 1e-6); } -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.65, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); -} - TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -414,6 +356,64 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -590,79 +590,6 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { 2.6, 1e-6); } -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -0.56, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 1.3, 1e-6); -} - TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -775,6 +702,79 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -980,99 +980,6 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { 0.4, 1e-6); } -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; - this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; - this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; - this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -1.64, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.9, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.6, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - 0.3, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.2, 1e-6); -} - TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -1185,6 +1092,99 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; -- GitLab From bb0ac8169cc60116cc12acd3b9ed46672bbc7302 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 22:18:39 +0100 Subject: [PATCH 66/76] added the unit test for div3d --- test/c/test_horizontal_divrot.cpp | 183 ++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 60f0641..078e753 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1279,3 +1279,186 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { } } } + +template <typename ValueType> +class HorizontalDivTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> cell_edge_idx; + std::vector<int> cell_edge_blk; + std::vector<ValueType> geofac_div; + std::vector<ValueType> div_vec_c; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalDivTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); + geofac_div.resize(dim_combine(nproma, 3, nblks_c)); + div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + } +}; + +template <typename ValueType> +class HorizontalDiv3DTest + : public HorizontalDivTest<ValueType> {}; +TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); + +TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div3d function + div3d<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + +} + +TYPED_TEST(HorizontalDiv3DTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div3d function + div3d<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} -- GitLab From 2942cdea1fabd8796aa8d5936554e901b8a78c8f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 22:22:20 +0100 Subject: [PATCH 67/76] added rest of the unit tests for div3d, div4d and divavg removed some redundant things --- test/c/test_horizontal_divrot.cpp | 850 ++++++++++++++++++++++++++++++ 1 file changed, 850 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 078e753..2ad95d2 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1305,6 +1305,13 @@ protected: std::vector<ValueType> f4din; std::vector<ValueType> f4dout; + // Followings are needed in HorizontalDivAvgTest + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> avg_coeff; + std::vector<ValueType> opt_in2; + std::vector<ValueType> opt_out2; + HorizontalDivTest() { slev.resize(dim4d, 0); elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) @@ -1316,12 +1323,30 @@ protected: div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); + opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); + opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); } }; template <typename ValueType> class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDiv3D2FTest + : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDiv4DTest + : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDivAvgTest + : public HorizontalDivTest<ValueType> {}; + TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { @@ -1462,3 +1487,828 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } } } + +TYPED_TEST_SUITE(HorizontalDiv3D2FTest, ValueTypes); + +TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->f4din[f4d_at(i, k, 0, 0)] = (i + 1) * (k + 2); // Different pattern for second field + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c and f4dout to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), + this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Check first field (same as in div3d test) + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + + // Check second field (expected values calculated manually) + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); +} + +TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c and f4dout to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), + this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Calculate reference value for first field + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + // Calculate reference value for second field + ref_f4dout[f4dout_at(jc, jk, jb, 0)] = + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], 0)] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], 0)] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], 0)] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results for first field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "First field results differ at i=" << i << ", k=" << k; + } + } + + // Verify results for second field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + << "Second field results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalDiv4DTest, ValueTypes); + +TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); + +} + +TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Compute reference result and check + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + TypeParam expected = 0.0; + for (int je = 0; je < 3; ++je) { + expected += this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], + jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], + ji)] * this->geofac_div[geofac_div_at(jc, je, jb)]; + } + + EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + << "Random test fails at jc=" << jc << ", jk=" << jk + << ", jb=" << jb << ", ji=" << ji; + } + } + } + } +} + +TYPED_TEST_SUITE(HorizontalDivAvgTest, ValueTypes); + +TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for additional parameters + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); + +} + +TYPED_TEST(HorizontalDivAvgTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); + +} + +TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; // Set to false for this test + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + } + } + + // Call the div_avg function with l2fields=false + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated since l2fields=false) + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, not opt_out2) + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results - only check div_vec_c since l2fields=false means opt_out2 isn't updated + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + } + } +} -- GitLab From 5c78853a91f20238d2e782ce6ad98159f7d9993e Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:13:48 +0100 Subject: [PATCH 68/76] added unit tests for rest of the functions --- test/c/test_horizontal_divrot.cpp | 352 ++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 2ad95d2..8f95e6e 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -2312,3 +2312,355 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { } } } + +template <typename ValueType> +class HorizontalRotVertexTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> vert_edge_idx; + std::vector<int> vert_edge_blk; + std::vector<ValueType> geofac_rot; + std::vector<ValueType> rot_vec; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalRotVertexTest () { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); + vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); + geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); + rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); + } +}; + +template <typename ValueType> +class HorizontalRotVertexAtmosTest + : public HorizontalRotVertexTest<ValueType> {}; + +template <typename ValueType> +class HorizontalRotVertexRITest + : public HorizontalRotVertexTest<ValueType> {}; + +TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalRotVertexRITest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Ensure computation is complete for both modes + Kokkos::fence(); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k << ")"; + } + } + +} -- GitLab From 66a6f843c3e2457fa22f2657bfdb3b2d28bce7d1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:16:18 +0100 Subject: [PATCH 69/76] applied clang-format to the test file --- test/c/test_horizontal_divrot.cpp | 706 ++++++++++++++++-------------- 1 file changed, 374 insertions(+), 332 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 8f95e6e..a97c1da 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -169,8 +169,9 @@ protected: /// to linear. template <typename ValueType> class HorizontalReconLinearTest - : public HorizontalReconTest< - ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::linear)> { +}; /// Test class for the horizontal tests. The reconstruction method is specified /// to quadratic. @@ -184,7 +185,7 @@ class HorizontalReconQuadraticTest template <typename ValueType> class HorizontalReconCubicTest : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::cubic)> { + ReconstructionMethod::cubic)> { }; /// ValueTypes which the divrot tests should run with @@ -483,12 +484,12 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; } } } @@ -832,12 +833,14 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - {int jb = 0; + { + int jb = 0; int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); // for (int jk = this->slev; jk < this->elev; ++jk) { - {int jk = 0; + { + int jk = 0; for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -849,7 +852,7 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = @@ -1240,12 +1243,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - {int jb = 0; + { + int jb = 0; int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); // for (int jk = this->slev; jk < this->elev; ++jk) { - {int jk = 0; + { + int jk = 0; for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -1257,7 +1262,7 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = @@ -1280,14 +1285,13 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { } } -template <typename ValueType> -class HorizontalDivTest : public ::testing::Test { +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 2; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int dim4d = 2; // 4th dimension size + static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] @@ -1332,20 +1336,16 @@ protected: }; template <typename ValueType> -class HorizontalDiv3DTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDiv3D2FTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv3D2FTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDiv4DTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv4DTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDivAvgTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDivAvgTest : public HorizontalDivTest<ValueType> {}; TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); @@ -1388,13 +1388,12 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { } // Call the div3d function - div3d<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); @@ -1402,7 +1401,6 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); - } TYPED_TEST(HorizontalDiv3DTest, TestRandom) { @@ -1431,7 +1429,8 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -1446,13 +1445,12 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } // Call the div3d function - div3d<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -1460,19 +1458,22 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } @@ -1508,7 +1509,8 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->f4din[f4d_at(i, k, 0, 0)] = (i + 1) * (k + 2); // Different pattern for second field + this->f4din[f4d_at(i, k, 0, 0)] = + (i + 1) * (k + 2); // Different pattern for second field } // Set edge indices to point to specific cells (including self) @@ -1534,14 +1536,13 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { } // Call the div3d_2field function - div3d_2field<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), - this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Check first field (same as in div3d test) EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); @@ -1590,7 +1591,8 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -1606,14 +1608,13 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { } // Call the div3d_2field function - div3d_2field<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), - this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -1622,32 +1623,38 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { // Calculate reference value for first field ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; // Calculate reference value for second field ref_f4dout[f4dout_at(jc, jk, jb, 0)] = this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], + 0)] * this->geofac_div[geofac_div_at(jc, 0, jb)] + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], + 0)] * this->geofac_div[geofac_div_at(jc, 1, jb)] + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], + 0)] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } @@ -1703,12 +1710,12 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { } // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); @@ -1722,7 +1729,6 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); - } TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { @@ -1759,29 +1765,29 @@ TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { } // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Compute reference result and check for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int ji = 0; ji < dim4d; ++ji) { for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { TypeParam expected = 0.0; for (int je = 0; je < 3; ++je) { - expected += this->f4din[f4din_at( - this->cell_edge_idx[cell_edge_at(jc, jb, je)], - jk, - this->cell_edge_blk[cell_edge_at(jc, jb, je)], - ji)] * this->geofac_div[geofac_div_at(jc, je, jb)]; + expected += + this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * + this->geofac_div[geofac_div_at(jc, je, jb)]; } EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) @@ -1826,7 +1832,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e } // Set edge indices to point to specific cells @@ -1867,12 +1874,11 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); @@ -1888,7 +1894,6 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); - } TYPED_TEST(HorizontalDivAvgTest, TestRandom) { @@ -1932,141 +1937,158 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c and aux_c2 - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - aux_c2[div_vec_c_at(jc, jk, jb)] = - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = aux_c2[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], - ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) - << "opt_out2 results differ at i=" << i << ", k=" << k; - } - } + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } } TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { @@ -2099,7 +2121,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e } // Set edge indices to point to specific cells @@ -2140,15 +2163,13 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); @@ -2162,7 +2183,6 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); - } TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { @@ -2185,7 +2205,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { // Parameters for the test int patch_id = 1; bool l_limited_area = true; - bool l2fields = false; // Set to false for this test + bool l2fields = false; // Set to false for this test const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; const auto &avg_coeff_at = at<nproma, 4, nblks_c>; @@ -2200,16 +2220,19 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + this->opt_in2[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway } // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2225,7 +2248,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { // Random initial values for div_vec_c and opt_out2 for (int k = 0; k < nlev; ++k) { this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + this->opt_out2[div_vec_c_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway } } @@ -2233,12 +2257,11 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Calculate reference values manually @@ -2254,20 +2277,24 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } } - // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated since l2fields=false) + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated + // since l2fields=false) for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, @@ -2275,12 +2302,14 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; } } } - // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, not opt_out2) + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, + // not opt_out2) for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, @@ -2289,21 +2318,26 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * this->avg_coeff[avg_coeff_at(jc, 3, jb)]; } } } - // Verify results - only check div_vec_c since l2fields=false means opt_out2 isn't updated + // Verify results - only check div_vec_c since l2fields=false means opt_out2 + // isn't updated for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], @@ -2316,20 +2350,20 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { template <typename ValueType> class HorizontalRotVertexTest : public ::testing::Test { protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int nblks_v = 1; // number of vertex blocks - static constexpr int dim4d = 2; // 4th dimension size + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; - int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 std::vector<int> slev; std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // Not using ACC-specific behavior. + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. std::vector<ValueType> vec_e; std::vector<int> vert_edge_idx; @@ -2339,9 +2373,9 @@ protected: std::vector<ValueType> f4din; std::vector<ValueType> f4dout; - HorizontalRotVertexTest () { + HorizontalRotVertexTest() { slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) vec_e.resize(dim_combine(nproma, nlev, nblks_e)); vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); @@ -2354,12 +2388,11 @@ protected: }; template <typename ValueType> -class HorizontalRotVertexAtmosTest - : public HorizontalRotVertexTest<ValueType> {}; +class HorizontalRotVertexAtmosTest : public HorizontalRotVertexTest<ValueType> { +}; template <typename ValueType> -class HorizontalRotVertexRITest - : public HorizontalRotVertexTest<ValueType> {}; +class HorizontalRotVertexRITest : public HorizontalRotVertexTest<ValueType> {}; TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); @@ -2404,10 +2437,9 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); // Expected values based on the initialization pattern @@ -2445,7 +2477,8 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { // Set random edge indices for (int j = 0; j < 6; ++j) { this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2462,10 +2495,9 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); // Calculate reference values separately and verify results @@ -2479,23 +2511,29 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * this->geofac_rot[geofac_rot_at(jv, 5, jb)]; } } @@ -2554,10 +2592,9 @@ TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->acc_async, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); // Expected values based on the initialization pattern @@ -2595,7 +2632,8 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { // Set random edge indices for (int j = 0; j < 6; ++j) { this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2612,10 +2650,9 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->acc_async, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); // Ensure computation is complete for both modes @@ -2632,23 +2669,29 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * this->geofac_rot[geofac_rot_at(jv, 5, jb)]; } } @@ -2662,5 +2705,4 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { << "Results differ at i=" << i << ", k=" << k << ")"; } } - } -- GitLab From 4ab914e2206e80f27add733b4555def6f9d2290d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:22:04 +0100 Subject: [PATCH 70/76] reverted back some changes --- test/c/test_horizontal_divrot.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a97c1da..10725a5 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -832,15 +832,11 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - { - int jb = 0; + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - // for (int jk = this->slev; jk < this->elev; ++jk) { - { - int jk = 0; + for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -1242,15 +1238,11 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - { - int jb = 0; + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - // for (int jk = this->slev; jk < this->elev; ++jk) { - { - int jk = 0; + for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( -- GitLab From 74bb7a218881f59dfe1d171989548f7c4dda352c Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:46:00 +0100 Subject: [PATCH 71/76] Remove redundant code --- test/c/test_horizontal_divrot.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 10725a5..0a0aba3 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -386,10 +386,6 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; this->lsq_moments[moments_at(i, 0, 1)] = 0.3; } -- GitLab From fbb724df44df45c7a65e57960de358ce0ad47114 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:48:34 +0100 Subject: [PATCH 72/76] Rename lib_divrot to mo_lib_divrot --- src/horizontal/lib_divrot.cpp | 1359 ----------------------------- src/horizontal/lib_divrot.hpp | 130 --- test/c/test_horizontal_divrot.cpp | 2 +- 3 files changed, 1 insertion(+), 1490 deletions(-) delete mode 100644 src/horizontal/lib_divrot.cpp delete mode 100644 src/horizontal/lib_divrot.hpp diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp deleted file mode 100644 index a24981d..0000000 --- a/src/horizontal/lib_divrot.cpp +++ /dev/null @@ -1,1359 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include <iostream> -#include <vector> - -#include <horizontal/lib_divrot.hpp> -#include <support/mo_lib_loopindices.hpp> - -template <typename T> -void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_qtmat_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T *> z_d("z_d", lsq_dim_c); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, - nblks_c); - UnmanagedConstT3D lsq_rmat_utri_c_view( - lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, - nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_l_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - // matrix multiplication Q^T d (partitioned into 2 dot products) - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); - - p_coeff_view(2, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); - p_coeff_view(1, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 0, jb) * - (z_qt_times_d(0) - - lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); - p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); - }); - if (l_consv) { - Kokkos::parallel_for( - "recon_lsq_cell_l_consv", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(0, jc, jk, jb) = - p_coeff_view(0, jc, jk, jb) - - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); - }); - } - } - - if (!acc_async) - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L); - -template <typename T> -void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T *> z_b("z_b", lsq_dim_c); - - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_l_svd_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - - p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); - p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); - p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); - }); - if (l_consv) { - Kokkos::parallel_for( - "recon_lsq_cell_l_svd_consv", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(0, jc, jk, jb) = - p_coeff_view(0, jc, jk, jb) - - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); - }); - } - } - - if (!acc_async) - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); - -template <typename T> -void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, - const T *lsq_rmat_utri_c, const T *lsq_moments, - T *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, int patch_id, bool l_limited_area, bool lacc, - int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); - UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, - (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, - nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - if (patch_id > 0 || l_limited_area) { - Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, - {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); - Kokkos::parallel_for( - "recon_lsq_cell_q_init", initPolicy, - KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { - p_coeff_view(ji, jc, jk, jb) = 0; - }); - } - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_q_step1", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_q_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - - p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); - p_coeff_view(4, jc, jk, jb) = - ptr_rrdiag(jc, 3, jb) * - (z_qt_times_d(3) - - ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); - p_coeff_view(3, jc, jk, jb) = - ptr_rrdiag(jc, 2, jb) * - (z_qt_times_d(2) - - ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); - p_coeff_view(2, jc, jk, jb) = - ptr_rrdiag(jc, 1, jb) * - (z_qt_times_d(1) - - ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - - ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); - p_coeff_view(1, jc, jk, jb) = - ptr_rrdiag(jc, 0, jb) * - (z_qt_times_d(0) - - ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - - ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - - ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); - p_coeff_view(0, jc, jk, jb) = - p_cc_view(jc, jk, jb) - - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); - }); - } - - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q); - -template <typename T> -void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - bool l_limited_area, bool lacc, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); - - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - if (patch_id > 0 || l_limited_area) { - Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, - {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); - Kokkos::parallel_for( - "recon_lsq_cell_q_svd_init", initPolicy, - KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { - p_coeff_view(ji, jc, jk, jb) = 0; - }); - } - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_q_svd_step1", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_q_svd_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); - p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); - p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); - p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); - p_coeff_view(0, jc, jk, jb) = - p_cc_view(jc, jk, jb) - - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); - }); - } - - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); - -template <typename T> -void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, - const T *lsq_rmat_utri_c, const T *lsq_moments, - T *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, int patch_id, bool l_limited_area, bool lacc, - int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); - - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); - UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, - (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, - nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - if (patch_id > 0 || l_limited_area) { - Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, - {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); - Kokkos::parallel_for( - "recon_lsq_cell_c_init", initPolicy, - KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { - p_coeff_view(ji, jc, jk, jb) = 0; - }); - } - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_c_step1", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_c_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); - - p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); - p_coeff_view(8, jc, jk, jb) = - ptr_rrdiag(jc, 7, jb) * - (z_qt_times_d(7) - - ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); - p_coeff_view(7, jc, jk, jb) = - ptr_rrdiag(jc, 6, jb) * - (z_qt_times_d(6) - - (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(6, jc, jk, jb) = - ptr_rrdiag(jc, 5, jb) * - (z_qt_times_d(5) - - (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(5, jc, jk, jb) = - ptr_rrdiag(jc, 4, jb) * - (z_qt_times_d(4) - - (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + - ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(4, jc, jk, jb) = - ptr_rrdiag(jc, 3, jb) * - (z_qt_times_d(3) - - (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + - ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + - ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(3, jc, jk, jb) = - ptr_rrdiag(jc, 2, jb) * - (z_qt_times_d(2) - - (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + - ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + - ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + - ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(2, jc, jk, jb) = - ptr_rrdiag(jc, 1, jb) * - (z_qt_times_d(1) - - (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + - ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + - ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + - ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) + - ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(1, jc, jk, jb) = - ptr_rrdiag(jc, 0, jb) * - (z_qt_times_d(0) - - (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + - ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + - ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + - ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) + - ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) + - ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) + - ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + - ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); - p_coeff_view(0, jc, jk, jb) = - p_cc_view(jc, jk, jb) - - (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) + - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) + - p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) + - p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) + - p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) + - p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8)); - }); - } - - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C); - -template <typename T> -void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - Kokkos::View<T *> z_b("z_b", 9); - - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - - UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); - - UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, - lsq_dim_c, nblks_c); - UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - - if (patch_id > 0 || l_limited_area) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( - {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); - Kokkos::parallel_for( - "recon_lsq_cell_c_svd_init", initPolicy, - KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { - p_coeff_view(ji, jc, jk, jb) = 0; - }); - } - } - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "recon_lsq_cell_c_svd_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); - - p_coeff_view(9, jc, jk, jb) = - lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); - p_coeff_view(8, jc, jk, jb) = - lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); - p_coeff_view(7, jc, jk, jb) = - lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); - p_coeff_view(6, jc, jk, jb) = - lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); - p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); - p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); - p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); - p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); - p_coeff_view(0, jc, jk, jb) = - p_cc_view(jc, jk, jb) - - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - - p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) - - p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) - - p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) - - p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); - }); - } - - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD); - -template <typename T> -void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); - - UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); - - UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); - UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - div_vec_c_view(jc, jk, jb) = - vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - }); - } -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D); - -template <typename T> -void div3d_2field(const T *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, - const T *in2, T *out2, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); - - UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); - - UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); - UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); - - UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); - UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div3d_2field_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - div_vec_c_view(jc, jk, jb) = - vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - - out2_view(jc, jk, jb) = - in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - }); - } -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD); - -template <typename T> -void div4d(const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, const T *f4din, T *f4dout, int dim4d, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstT4D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT4D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); - - UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); - - UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); - UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - for (int ji = 0; ji < dim4d; ++ji) { - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, - {elev[ji], i_endidx}); - Kokkos::parallel_for( - "div4d_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - f4dout_view(jc, jk, jb, ji) = - f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) * - geofac_div_view(jc, 0, jb) + - f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) * - geofac_div_view(jc, 1, jb) + - f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) * - geofac_div_view(jc, 2, jb); - }); - } - } -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D); - -template <typename T> -void div_avg(const T *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, - T *div_vec_c, const T *opt_in2, T *opt_out2, - const int *i_startblk_in, const int *i_endblk_in, - const int *i_startidx_in, const int *i_endidx_in, int slev, - int elev, int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); - - UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3); - UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); - - UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); - - UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); - - UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); - UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); - - Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c); - Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c); - - int i_startblk = i_startblk_in[0]; - int i_endblk = i_endblk_in[0]; - - if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step1", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - aux_c(jc, jk, jb) = - vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - aux_c2(jc, jk, jb) = - opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - }); - } - } else { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - aux_c(jc, jk, jb) = - vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * - geofac_div_view(jc, 0, jb) + - vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * - geofac_div_view(jc, 1, jb) + - vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * - geofac_div_view(jc, 2, jb); - }); - } - } - - if (patch_id > 0 || l_limited_area) { - i_startblk = i_startblk_in[1]; - i_endblk = i_endblk_in[1]; - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step3", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb); - }); - } - - if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step4", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb); - }); - } - } - } - - i_startblk = i_startblk_in[2]; - i_endblk = i_endblk_in[2]; - - if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step5", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - div_vec_c_view(jc, jk, jb) = - aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + - aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * - avg_coeff_view(jc, 1, jb) + - aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * - avg_coeff_view(jc, 2, jb) + - aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * - avg_coeff_view(jc, 3, jb); - opt_out2_view(jc, jk, jb) = - aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + - aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * - avg_coeff_view(jc, 1, jb) + - aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * - avg_coeff_view(jc, 2, jb) + - aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * - avg_coeff_view(jc, 3, jb); - }); - } - } else { - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "div_avg_step6", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - div_vec_c_view(jc, jk, jb) = - aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + - aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * - avg_coeff_view(jc, 1, jb) + - aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * - avg_coeff_view(jc, 2, jb) + - aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * - avg_coeff_view(jc, 3, jb); - }); - } - } -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG); - -template <typename T> -void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); - - UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); - UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); - - UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); - - UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "rot_vertex_atmos_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - rot_vec_view(jv, jk, jb) = - vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * - geofac_rot_view(jv, 0, jb) + - vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * - geofac_rot_view(jv, 1, jb) + - vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * - geofac_rot_view(jv, 2, jb) + - vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * - geofac_rot_view(jv, 3, jb) + - vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * - geofac_rot_view(jv, 4, jb) + - vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * - geofac_rot_view(jv, 5, jb); - }); - } -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS); - -template <typename T> -void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, bool lacc, - bool acc_async, int nlev, int nblks_e, int nblks_v) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; - typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, - Kokkos::MemoryUnmanaged> - UnmanagedConstInt3D; - - UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); - - UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); - UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); - - UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); - - UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); - - for (int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, - i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); - Kokkos::parallel_for( - "rot_vertex_atmos_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - rot_vec_view(jv, jk, jb) = - vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * - geofac_rot_view(jv, 0, jb) + - vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * - geofac_rot_view(jv, 1, jb) + - vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * - geofac_rot_view(jv, 2, jb) + - vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * - geofac_rot_view(jv, 3, jb) + - vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * - geofac_rot_view(jv, 4, jb) + - vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * - geofac_rot_view(jv, 5, jb); - }); - } - - if (!acc_async) - Kokkos::fence(); -} - -ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI); diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp deleted file mode 100644 index b8e9743..0000000 --- a/src/horizontal/lib_divrot.hpp +++ /dev/null @@ -1,130 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -#include <Kokkos_Core.hpp> -#include <types.hpp> - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \ - void recon_lsq_cell_l( \ - const _type *p_cc, const int *cell_neighbor_idx, \ - const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \ - const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ - const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ - int lsq_dim_unk, int lsq_dim_c) - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \ - void recon_lsq_cell_l_svd( \ - const _type *p_cc, const int *cell_neighbor_idx, \ - const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \ - const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ - int lsq_dim_unk, int lsq_dim_c) - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ - void recon_lsq_cell_q( \ - const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ - const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ - void recon_lsq_cell_q_svd( \ - const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ - void recon_lsq_cell_c( \ - const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ - const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) - -#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ - void recon_lsq_cell_c_svd( \ - const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) - -#define ICONMATH_DECLARE_DIV3D(_type) \ - void div3d(const _type *vec_e, const int *cell_edge_idx, \ - const int *cell_edge_blk, const _type *geofac_div, \ - _type *div_vec_c, int i_startblk, int i_endblk, \ - int i_startidx_in, int i_endidx_in, int slev, int elev, \ - int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) - -#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \ - void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \ - const int *cell_edge_blk, const _type *geofac_div, \ - _type *div_vec_c, const _type *in2, _type *out2, \ - int i_startblk, int i_endblk, int i_startidx_in, \ - int i_endidx_in, int slev, int elev, int nproma, \ - bool lacc, int nlev, int nblks_c, int nblks_e) - -#define ICONMATH_DECLARE_DIV4D(_type) \ - void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \ - const _type *geofac_div, const _type *f4din, _type *f4dout, \ - int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \ - int i_endidx_in, const int *slev, const int *elev, int nproma, \ - bool lacc, int nlev, int nblks_c, int nblks_e) - -#define ICONMATH_DECLARE_DIV_AVG(_type) \ - void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \ - const int *cell_neighbor_blk, const int *cell_edge_idx, \ - const int *cell_edge_blk, const _type *geofac_div, \ - const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \ - _type *opt_out2, const int *i_startblk_in, \ - const int *i_endblk_in, const int *i_startidx_in, \ - const int *i_endidx_in, int slev, int elev, int nproma, \ - int patch_id, bool l_limited_area, bool l2fields, bool lacc, \ - int nlev, int nblks_c, int nblks_e) - -#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \ - void rot_vertex_atmos( \ - const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ - const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ - bool lacc, int nlev, int nblks_e, int nblks_v) - -#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \ - void rot_vertex_ri( \ - const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ - const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ - bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) - -// Declare as templates -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T); -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T); -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T); -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T); -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T); -template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T); -template <typename T> ICONMATH_DECLARE_DIV3D(T); -template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T); -template <typename T> ICONMATH_DECLARE_DIV4D(T); -template <typename T> ICONMATH_DECLARE_DIV_AVG(T); -template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T); -template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T); diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 0a0aba3..6a6e458 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -15,7 +15,7 @@ #include <Kokkos_Core.hpp> #include <gtest/gtest.h> -#include <horizontal/lib_divrot.hpp> +#include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> // Template function for computing array size. -- GitLab From 7f57b7699279b355c7de746b6b142d4b5d61f945 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:53:42 +0100 Subject: [PATCH 73/76] Combine Div tests --- test/c/test_horizontal_divrot.cpp | 40 ++++++++++--------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 6a6e458..f3cb3c8 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1323,21 +1323,9 @@ protected: } }; -template <typename ValueType> -class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; - -template <typename ValueType> -class HorizontalDiv3D2FTest : public HorizontalDivTest<ValueType> {}; - -template <typename ValueType> -class HorizontalDiv4DTest : public HorizontalDivTest<ValueType> {}; +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); -template <typename ValueType> -class HorizontalDivAvgTest : public HorizontalDivTest<ValueType> {}; - -TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); - -TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1391,7 +1379,7 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); } -TYPED_TEST(HorizontalDiv3DTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1477,9 +1465,7 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } } -TYPED_TEST_SUITE(HorizontalDiv3D2FTest, ValueTypes); - -TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1549,7 +1535,7 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); } -TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1667,9 +1653,7 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { } } -TYPED_TEST_SUITE(HorizontalDiv4DTest, ValueTypes); - -TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1719,7 +1703,7 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); } -TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1787,9 +1771,9 @@ TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { } } -TYPED_TEST_SUITE(HorizontalDivAvgTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); -TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1884,7 +1868,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); } -TYPED_TEST(HorizontalDivAvgTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -2079,7 +2063,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandom) { } } -TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -2173,7 +2157,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); } -TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { +TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; -- GitLab From db821d8cd4c300ecea3195634a22ddd155008dfb Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 14:07:16 +0100 Subject: [PATCH 74/76] Split tests into three files --- test/c/test_horizontal_divrot.cpp | 2680 ----------------------------- 1 file changed, 2680 deletions(-) delete mode 100644 test/c/test_horizontal_divrot.cpp diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp deleted file mode 100644 index f3cb3c8..0000000 --- a/test/c/test_horizontal_divrot.cpp +++ /dev/null @@ -1,2680 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include <iostream> -#include <random> -#include <vector> - -#include <Kokkos_Core.hpp> -#include <gtest/gtest.h> -#include <horizontal/mo_lib_divrot.hpp> -#include <support/mo_lib_loopindices.hpp> - -// Template function for computing array size. -// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by -// dim_combine(2, 3, 4, 5). -// Which will automatically instantiate -// dim_combine<int, int, int, int>(2, 3, 4, 5). -// The function then call dim_combine recursively -// dim_combine<int, int, int, int>(2, 3, 4, 5) { -// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); -// } -// dim_combine<int, int, int>(3, 4, 5) { -// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); -// } -// dim_combine<int, int>(4, 5) { -// return static_cast<size_t>(4) * dim_combine<int>(5); -// } -// Where the last dim_combine is specialized as -// dim_combine<int>(5) { -// return static_cast<size_t>(5); -// } -// Which gives -// dim_combine<int, int, int, int>(2, 3, 4, 5) = -// static_cast<size_t>(2) * static_cast<size_t>(3) * -// static_cast<size_t>(4) * static_cast<size_t>(5) -/// Template helpers for combining multiple dimension array sizes. -/// The base function of dimension combine. Should not be used. -template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } -/// Template specialization of only one dimension, returns the dimension itself. -template <typename T> size_t dim_combine(T dim) { - return static_cast<size_t>(dim); -} -/// Template specialization of picking out the first dimension. The combined -/// dimension is the first dimension times the combined dimension of the rest. -template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { - return static_cast<size_t>(dim) * dim_combine(dims...); -} - -// Template function for LayoutLeft ID access in compile time. -// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its -// corresponding vector id (LayoutLeft) by -// at<2, 3, 4, 5>(id1, id2, id3, id4). -// The at_impl then adds the id from beginning to the end and pass the id prefix -// to the next recursive at_impl function. In this example, -// at<2, 3, 4, 5>(id1, id2, id3, id4) { -// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); -// } -// at_impl<3, 4, 5>(2, id2, id3, id4) { -// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); -// } -// at_impl<4, 5>(2 * 3, id3, id4) { -// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); -// } -// at_impl<5>(2 * 3 * 4, id4) { -// return id4 * 2 * 3 * 4; -// } -// Which gives -// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + -// id3 * 2 * 3 + id4 * 2 * 3 * 4 -/// Helper type converting integer numbers to int -template <class T, auto> using always_t = T; -/// Base function of at_impl. Should not be used. -template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } -/// Template specialization of the last ID -template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } -/// Template specialization of at_impl, accumulate the return value using the -/// first id and pass the prefix to the next recursive at_impl function. -template <int FirstDim, int... Dims> -int at_impl(int prefix, int id, always_t<int, Dims>... ids) { - return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); -} -/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -/// LayoutLeft. Use this function instead of at_impl. -template <int FirstDim, int... Dims> -int at(int id, always_t<int, Dims>... ids) { - return id + at_impl<Dims...>(FirstDim, ids...); -} - -/// Enum class for the reconstruction method -enum class ReconstructionMethod { - linear, - quadratic, - cubic, -}; - -/// Base test class for the horizontal divrot tests. Templated for the ValueType -/// and ReconMethod for the reconstruction method. -template <typename ValueType, int ReconMethod> -class HorizontalReconTest : public ::testing::Test { -protected: - // lsq_dim_c and lsq_dim_unk are instantiated in compile time. - static constexpr std::tuple<int, int> - init_lsq_dim(ReconstructionMethod method) { - switch (method) { - case ReconstructionMethod::linear: - return std::make_tuple(3, 2); - case ReconstructionMethod::quadratic: - return std::make_tuple(9, 5); - case ReconstructionMethod::cubic: - return std::make_tuple(9, 9); - } - } - - // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 1; // number of vertical levels - static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = - init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); - static constexpr int lsq_dim_c = std::get<0>(lsq_dim); - static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); - - // Parameter values. - int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev; // Full vertical range (0 .. nlev-1) - int patch_id = 0; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. - bool l_consv = true; // With conservative correction. - bool l_limited_area = true; // Limited area setup - - std::vector<ValueType> p_cc; - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> lsq_qtmat_c; - std::vector<ValueType> lsq_rmat_rdiag_c; - std::vector<ValueType> lsq_rmat_utri_c; - std::vector<ValueType> lsq_moments; - std::vector<ValueType> lsq_pseudoinv; - std::vector<ValueType> p_coeff; - - HorizontalReconTest() { - p_cc.resize(dim_combine(nproma, nlev, nblks_c)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); - lsq_rmat_utri_c.resize(dim_combine( - nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); - lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); - lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); - } -}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to linear. -template <typename ValueType> -class HorizontalReconLinearTest - : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::linear)> { -}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to quadratic. -template <typename ValueType> -class HorizontalReconQuadraticTest - : public HorizontalReconTest< - ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to cubic. -template <typename ValueType> -class HorizontalReconCubicTest - : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::cubic)> { -}; - -/// ValueTypes which the divrot tests should run with -typedef ::testing::Types<float, double> ValueTypes; - -TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.34, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 1.0, 1e-6); -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); - - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); - } - - // Test function - recon_lsq_cell_l<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - z_qt_times_d[0] = 0.0; - z_qt_times_d[1] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; - z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; - } - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * - (z_qt_times_d[0] - - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.65, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; - } - - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.24, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 3.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -2.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 2.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -3.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 2.6, 1e-6); -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_q<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d[j] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; - } - } - int utri_id = 0; - for (int j = lsq_dim_unk; j > 0; --j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; - for (int k = j + 1; k <= lsq_dim_unk; ++k) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -0.56, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 1.3, 1e-6); -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization is done only for iblk = 0 and ilev = 0 - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 1; j < lsq_dim_unk + 1; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; - } - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) - << "For loop result fails for j = " << j << ", jc = " << jc; - } - } -} - -TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; - this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; - this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; - this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; - } - - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.28, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.4, 1e-6); -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_c<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d[j] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; - } - } - int utri_id = 0; - for (int j = lsq_dim_unk; j > 0; --j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; - for (int k = j + 1; k <= lsq_dim_unk; ++k) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; - this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; - this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; - this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -1.64, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.9, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.6, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - 0.3, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.2, 1e-6); -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 1; j < lsq_dim_unk + 1; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; - } - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -template <typename ValueType> class HorizontalDivTest : public ::testing::Test { -protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_c = 1; // number of cell blocks - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int dim4d = 2; // 4th dimension size - - int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - std::vector<int> slev; - std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - - std::vector<ValueType> vec_e; - std::vector<int> cell_edge_idx; - std::vector<int> cell_edge_blk; - std::vector<ValueType> geofac_div; - std::vector<ValueType> div_vec_c; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; - - // Followings are needed in HorizontalDivAvgTest - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> avg_coeff; - std::vector<ValueType> opt_in2; - std::vector<ValueType> opt_out2; - - HorizontalDivTest() { - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); - geofac_div.resize(dim_combine(nproma, 3, nblks_c)); - div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); - avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); - opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); - opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); - } -}; - -TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); - -TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Initialize div_vec_c to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div3d function - div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize div_vec_c to random values - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div3d function - div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->f4din[f4d_at(i, k, 0, 0)] = - (i + 1) * (k + 2); // Different pattern for second field - } - - // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Initialize div_vec_c and f4dout to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; - } - } - - // Call the div3d_2field function - div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Check first field (same as in div3d test) - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); - - // Check second field (expected values calculated manually) - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize div_vec_c and f4dout to random values - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); - } - } - - // Call the div3d_2field function - div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); - std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - // Calculate reference value for first field - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - // Calculate reference value for second field - ref_f4dout[f4dout_at(jc, jk, jb, 0)] = - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)], - 0)] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)], - 0)] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)], - 0)] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Verify results for first field - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "First field results differ at i=" << i << ", k=" << k; - } - } - - // Verify results for second field - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], - ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) - << "Second field results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); - } - - for (int k = 0; k < nlev; ++k) { - for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; - } - } - } - - // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - for (int k = 0; k < nlev; ++k) { - for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; - } - } - } - - // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); - - // Compute reference result and check - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int ji = 0; ji < dim4d; ++ji) { - for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - TypeParam expected = 0.0; - for (int je = 0; je < 3; ++je) { - expected += - this->f4din[f4din_at( - this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * - this->geofac_div[geofac_div_at(jc, je, jb)]; - } - - EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) - << "Random test fails at jc=" << jc << ", jk=" << jk - << ", jb=" << jb << ", ji=" << ji; - } - } - } - } -} - -TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); - -TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for additional parameters - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = true; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Initialize the vectors with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e - } - - // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges and neighbors are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor - - // Initialize div_vec_c and opt_out2 to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = true; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c and aux_c2 - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - aux_c2[div_vec_c_at(jc, jk, jb)] = - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)]; - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], - ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) - << "opt_out2 results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = false; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Initialize the vectors with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e - } - - // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges and neighbors are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor - - // Initialize div_vec_c and opt_out2 to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = false; // Set to false for this test - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway - } - } - - // Call the div_avg function with l2fields=false - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated - // since l2fields=false) - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, - // not opt_out2) - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - only check div_vec_c since l2fields=false means opt_out2 - // isn't updated - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - } - } -} - -template <typename ValueType> -class HorizontalRotVertexTest : public ::testing::Test { -protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int nblks_v = 1; // number of vertex blocks - static constexpr int dim4d = 2; // 4th dimension size - - int i_startblk = 0; - int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - std::vector<int> slev; - std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // Not using ACC-specific behavior. - - std::vector<ValueType> vec_e; - std::vector<int> vert_edge_idx; - std::vector<int> vert_edge_blk; - std::vector<ValueType> geofac_rot; - std::vector<ValueType> rot_vec; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; - - HorizontalRotVertexTest() { - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); - vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); - geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); - rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); - } -}; - -template <typename ValueType> -class HorizontalRotVertexAtmosTest : public HorizontalRotVertexTest<ValueType> { -}; - -template <typename ValueType> -class HorizontalRotVertexRITest : public HorizontalRotVertexTest<ValueType> {}; - -TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); - -TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific edges - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; - // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; - } - - // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; - - // Initialize rot_vec to zero - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; - } - } - - // Call the rot_vertex_atmos function - rot_vertex_atmos<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_e, this->nblks_v); - - // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize rot_vec to random values - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the rot_vertex_atmos function - rot_vertex_atmos<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_e, this->nblks_v); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { - ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST_SUITE(HorizontalRotVertexRITest, ValueTypes); - -TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific edges - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; - // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; - } - - // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; - - // Initialize rot_vec to zero - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; - } - } - - // Call the rot_vertex_ri function - rot_vertex_ri<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, - this->nlev, this->nblks_e, this->nblks_v); - - // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize rot_vec to random values - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the rot_vertex_ri function - rot_vertex_ri<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, - this->nlev, this->nblks_e, this->nblks_v); - - // Ensure computation is complete for both modes - Kokkos::fence(); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { - ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k << ")"; - } - } -} -- GitLab From d93107b683c253d94b937171fcba6f571f22da04 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 12 Mar 2025 15:58:19 +0100 Subject: [PATCH 75/76] Inner product using lambda functions --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/mo_lib_divrot.cpp | 71 ++++++++++++-------------------- 2 files changed, 29 insertions(+), 46 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index f3b75c0..a09fdc2 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -22,7 +22,9 @@ set(Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mod") set_target_properties( iconmath-horizontal PROPERTIES Fortran_MODULE_DIRECTORY "${Fortran_MODULE_DIRECTORY}" - EXPORT_NAME ${PROJECT_NAME}::horizontal) + EXPORT_NAME ${PROJECT_NAME}::horizontal + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON) if(IM_ENABLE_LOOP_EXCHANGE) target_compile_definitions(iconmath-horizontal PRIVATE __LOOP_EXCHANGE) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index d086e8b..b33df79 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -15,6 +15,18 @@ #include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> +#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ + auto inner_product = [=, &_output](int _id, auto &&...ts) { \ + return [=, &_output] { \ + _output(_id) = 0.0; \ + int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ + }; \ + }; \ + auto _func_name = [=]<int... Is>(int _id, \ + std::integer_sequence<int, Is...>) { \ + return inner_product(_id, Is...)(); \ + }; + template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -261,51 +273,20 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, + int jb, int jk, int jc, int unk, + int i) { + z_qt_times_d(unk) += + lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + }; + DECLARE_LAMBDA_INNER_PRODUCT( + dot_product, z_qt_times_d, unk, + lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); + dot_product(0, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(4, std::make_integer_sequence<int, 9>()); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = -- GitLab From c86e24d242919bd3687d3048bf5d7b3eff4cacd3 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 11:06:46 +0100 Subject: [PATCH 76/76] Reorganize lambda functions so that it is easier to understand --- src/horizontal/mo_lib_divrot.cpp | 311 ++++++++----------------------- 1 file changed, 73 insertions(+), 238 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index b33df79..c09dfcc 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -15,16 +15,10 @@ #include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> -#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ - auto inner_product = [=, &_output](int _id, auto &&...ts) { \ - return [=, &_output] { \ - _output(_id) = 0.0; \ - int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ - }; \ - }; \ - auto _func_name = [=]<int... Is>(int _id, \ +#define DECLARE_INTEGER_SEQUENCE_WRAPPER_W_ID(_func_name, _base_function) \ + auto _func_name = [=]<int... Is>(int unk, \ std::integer_sequence<int, Is...>) { \ - return inner_product(_id, Is...)(); \ + return _base_function(unk, Is...)(); \ }; template <typename T> @@ -273,15 +267,18 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, - int jb, int jk, int jc, int unk, - int i) { - z_qt_times_d(unk) += - lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + auto inner_product = [=, &z_qt_times_d](int unk, auto &&...ts) { + return [=, &z_qt_times_d] { + z_qt_times_d(unk) = 0.0; + int dummy[sizeof...(ts)]{( + [=, &z_qt_times_d](int i) { + z_qt_times_d(unk) += + lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + }(ts), + 0)...}; + }; }; - DECLARE_LAMBDA_INNER_PRODUCT( - dot_product, z_qt_times_d, unk, - lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); + DECLARE_INTEGER_SEQUENCE_WRAPPER_W_ID(dot_product, inner_product); dot_product(0, std::make_integer_sequence<int, 9>()); dot_product(1, std::make_integer_sequence<int, 9>()); dot_product(2, std::make_integer_sequence<int, 9>()); @@ -401,56 +398,23 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::parallel_for( "recon_lsq_cell_q_svd_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); - p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); - p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); - p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + auto inner_product = [=, &p_coeff_view](int unk, auto &&...ts) { + return [=, &p_coeff_view] { + p_coeff_view(unk + 1, jc, jk, jb) = 0.0; + int dummy[sizeof...(ts)]{( + [=, &p_coeff_view](int i) { + p_coeff_view(unk + 1, jc, jk, jb) += + lsq_pseudoinv_view(jc, unk, i, jb) * z_b(i, jc, jk); + }(ts), + 0)...}; + }; + }; + DECLARE_INTEGER_SEQUENCE_WRAPPER_W_ID(dot_product, inner_product); + dot_product(4, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(0, std::make_integer_sequence<int, 9>()); p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - @@ -546,87 +510,27 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_c_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); + auto inner_product = [=, &z_qt_times_d](int unk, auto &&...ts) { + return [=, &z_qt_times_d] { + z_qt_times_d(unk) = 0.0; + int dummy[sizeof...(ts)]{( + [=, &z_qt_times_d](int i) { + z_qt_times_d(unk) += + lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + }(ts), + 0)...}; + }; + }; + DECLARE_INTEGER_SEQUENCE_WRAPPER_W_ID(dot_product, inner_product); + dot_product(0, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(4, std::make_integer_sequence<int, 9>()); + dot_product(5, std::make_integer_sequence<int, 9>()); + dot_product(6, std::make_integer_sequence<int, 9>()); + dot_product(7, std::make_integer_sequence<int, 9>()); + dot_product(8, std::make_integer_sequence<int, 9>()); p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); p_coeff_view(8, jc, jk, jb) = @@ -786,96 +690,27 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - p_coeff_view(9, jc, jk, jb) = - lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); - p_coeff_view(8, jc, jk, jb) = - lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); - p_coeff_view(7, jc, jk, jb) = - lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); - p_coeff_view(6, jc, jk, jb) = - lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); - p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); - p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); - p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); - p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); + auto inner_product = [=, &p_coeff_view](int unk, auto &&...ts) { + return [=, &p_coeff_view] { + p_coeff_view(unk + 1, jc, jk, jb) = 0.0; + int dummy[sizeof...(ts)]{( + [=, &p_coeff_view](int i) { + p_coeff_view(unk + 1, jc, jk, jb) += + lsq_pseudoinv_view(jc, unk, i, jb) * z_b(i); + }(ts), + 0)...}; + }; + }; + DECLARE_INTEGER_SEQUENCE_WRAPPER_W_ID(dot_product, inner_product); + dot_product(8, std::make_integer_sequence<int, 9>()); + dot_product(7, std::make_integer_sequence<int, 9>()); + dot_product(6, std::make_integer_sequence<int, 9>()); + dot_product(5, std::make_integer_sequence<int, 9>()); + dot_product(4, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(0, std::make_integer_sequence<int, 9>()); p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - -- GitLab