From 0ce09afcade6c639b39734678e5a1cd04a2285da Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:52:24 +0100 Subject: [PATCH 01/50] added unittests for mo_lib_loopindices --- test/fortran/test_loopindices.f90 | 145 ++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 test/fortran/test_loopindices.f90 diff --git a/test/fortran/test_loopindices.f90 b/test/fortran/test_loopindices.f90 new file mode 100644 index 0000000..3c20630 --- /dev/null +++ b/test/fortran/test_loopindices.f90 @@ -0,0 +1,145 @@ +! ICON +! +! --------------------------------------------------------------- +! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +! Contact information: icon-model.org +! +! See AUTHORS.TXT for a list of authors +! See LICENSES/ for license information +! SPDX-License-Identifier: BSD-3-Clause +! --------------------------------------------------------------- + +MODULE TEST_mo_lib_loopindices + USE FORTUTF + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: TEST_get_indices_lib + +CONTAINS + + SUBROUTINE TEST_get_indices_lib + USE mo_lib_loopindices, ONLY: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib + + INTEGER :: i_startidx_in ! Start index as input + INTEGER :: i_endidx_in ! End index as input + INTEGER :: nproma ! inner loop length/vector length + INTEGER :: i_blk ! Current block (variable jb in do loops) + INTEGER :: i_startblk ! Start block of do loop + INTEGER :: i_endblk ! End block of do loop + + INTEGER :: i_startidx_out, i_endidx_out ! Start and end indices (jc loop), as output + + i_startidx_in = 2 + i_endidx_in = 15 + nproma = 32 + i_startblk = 1 + i_endblk = 40 + + ! CASE: I -> i_blk == i_startblk + i_blk = 1 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_c_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_e_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_1") + CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in)) + CALL TAG_TEST("TEST_get_indices_v_end_1") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + ! CASE: II -> i_blk == i_endblk + i_blk = 40 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_c_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_e_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_2") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_v_end_2") + CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in) + + i_startidx_out = 0 + i_endidx_out = 0 + + ! CASE: III -> Every other cases + i_blk = 20 + CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_c_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_c_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_e_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_e_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + i_startidx_out = 0 + i_endidx_out = 0 + + CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) + + CALL TAG_TEST("TEST_get_indices_v_start_3") + CALL ASSERT_EQUAL(i_startidx_out, 1) + CALL TAG_TEST("TEST_get_indices_v_end_3") + CALL ASSERT_EQUAL(i_endidx_out, nproma) + + END SUBROUTINE TEST_get_indices_lib + +END MODULE TEST_mo_lib_loopindices -- GitLab From fe47826cf3b0a08452f813c6707b4f9b95cd21d7 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:53:01 +0100 Subject: [PATCH 02/50] added unittests for mo_math_utilities, partially done --- test/fortran/test_math_utilities.f90 | 270 +++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 test/fortran/test_math_utilities.f90 diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 new file mode 100644 index 0000000..25064f5 --- /dev/null +++ b/test/fortran/test_math_utilities.f90 @@ -0,0 +1,270 @@ +! ICON +! +! --------------------------------------------------------------- +! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +! Contact information: icon-model.org +! +! See AUTHORS.TXT for a list of authors +! See LICENSES/ for license information +! SPDX-License-Identifier: BSD-3-Clause +! --------------------------------------------------------------- + +MODULE TEST_mo_math_utilities + + USE FORTUTF + USE mo_math_types, ONLY: t_cartesian_coordinates, t_geographical_coordinates, & + & t_line, t_tangent_vectors + USE mo_math_constants, ONLY: pi, pi_2, pi_4 + USE mo_lib_grid_geometry_info + ! USE mo_physical_constants, ONLY: earth_radius + USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY: wp => real64 + + IMPLICIT NONE + + PRIVATE + + PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec + REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius + +CONTAINS + + SUBROUTINE TEST_cc2gc + + USE mo_math_utilities, ONLY: cc2gc + + TYPE(t_cartesian_coordinates) :: coord + TYPE(t_grid_geometry_info) :: geometry_info + TYPE(t_geographical_coordinates) :: pos + + REAL(wp) :: lon_ref, lat_ref + + coord%x(1) = 10.0_wp + coord%x(2) = 10.0_wp + coord%x(3) = 5.0_wp + + geometry_info%geometry_type = sphere_geometry + + pos = cc2gc(coord, geometry_info) + + lon_ref = 0.78539816339744828_wp + lat_ref = 0.33983690945412193_wp + + CALL TAG_TEST("TEST_cc2gc_sphere_lon") + CALL ASSERT_EQUAL(pos%lon, lon_ref) + CALL TAG_TEST("TEST_cc2gc_sphere_lat") + CALL ASSERT_EQUAL(pos%lat, lat_ref) + + geometry_info%geometry_type = planar_torus_geometry + + geometry_info%domain_length = 2.0_wp*pi*earth_radius + geometry_info%domain_height = 2.0_wp*pi*earth_radius + + coord%x(1) = 1000000.0_wp + coord%x(2) = 100.0_wp + + pos = cc2gc(coord, geometry_info) + + lon_ref = 0.15695558894524117_wp + lat_ref = -0.17453205322393880_wp + + CALL TAG_TEST("TEST_cc2gc_torus_lon") + CALL ASSERT_EQUAL(pos%lon, lon_ref) + CALL TAG_TEST("TEST_cc2gc_torus_lat") + CALL ASSERT_EQUAL(pos%lat, lat_ref) + + END SUBROUTINE TEST_cc2gc + + SUBROUTINE TEST_gc2cc + + USE mo_math_utilities, ONLY: gc2cc + + TYPE(t_cartesian_coordinates) :: coord, coord_ref + TYPE(t_grid_geometry_info) :: geometry_info + TYPE(t_geographical_coordinates) :: pos + + REAL(wp) :: lon_ref, lat_ref, tol + INTEGER :: i + CHARACTER(LEN=32) :: tag + + tol = 1d-15 + pos%lon = 0.78_wp + pos%lat = 0.34_wp + + geometry_info%geometry_type = sphere_geometry + + coord = gc2cc(pos, geometry_info) + + coord_ref%x(1) = 0.6702170547483377_wp + coord_ref%x(2) = 0.6630199536212522_wp + coord_ref%x(3) = 0.3334870921408144_wp + + DO i = 1, SIZE(coord%x) +! write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i) + WRITE (tag, '(a,i1)') "TEST_gc2cc_sphere_", i + CALL TAG_TEST(tag) + CALL ASSERT_ALMOST_EQUAL(coord%x(i), coord_ref%x(i), tol) + END DO + + geometry_info%geometry_type = planar_torus_geometry + + geometry_info%domain_length = 2.0_wp*pi*earth_radius + geometry_info%domain_height = 2.0_wp*pi*earth_radius + + pos%lon = 0.15_wp + pos%lat = -0.17_wp + + coord = gc2cc(pos, geometry_info) + + coord_ref%x(1) = 955684.3499999998603016_wp + coord_ref%x(2) = 519845.4807382422150113_wp + coord_ref%x(3) = 0.0_wp + + DO i = 1, SIZE(coord%x) +! write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i) + WRITE (tag, '(a,i1)') "TEST_gc2cc_torus_", i + CALL TAG_TEST(tag) + CALL ASSERT_EQUAL(coord%x(i), coord_ref%x(i)) + END DO + + END SUBROUTINE TEST_gc2cc + + SUBROUTINE TEST_cc2tv + USE mo_math_utilities, ONLY: cc2tv + + TYPE(t_cartesian_coordinates) :: coord + TYPE(t_geographical_coordinates) :: pos + TYPE(t_tangent_vectors):: tt + REAL(wp) :: v1_ref, v2_ref + + pos%lon = pi_2 + pos%lat = pi_4 + coord%x(1) = 10.0_wp + coord%x(2) = 15.0_wp + coord%x(3) = 5.0_wp + + tt = cc2tv(coord, pos) + + v1_ref = -9.9999999999999982_wp + v2_ref = -7.0710678118654737_wp + + CALL TAG_TEST("TEST_cc2tv_v1") + CALL ASSERT_EQUAL(tt%v1, v1_ref) + CALL TAG_TEST("TEST_cc2tv_v2") + CALL ASSERT_EQUAL(tt%v2, v2_ref) + + END SUBROUTINE TEST_cc2tv + + SUBROUTINE TEST_gvec2cvec + USE mo_math_utilities, ONLY: gvec2cvec + + REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. component + REAL(wp) :: p_long, p_lat ! geo. coord. of data point + TYPE(t_grid_geometry_info) :: geometry_info + + REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector + REAL(wp) :: p_cu_ref, p_cv_ref, p_cw_ref ! Cart. vector ref + + geometry_info%geometry_type = sphere_geometry + + p_gu = 10.0_wp + p_gv = 5.0_wp + + p_long = pi_2 + p_lat = pi_4 + + CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info) + + p_cu_ref = -10.0_wp + p_cv_ref = -3.5355339059327369_wp + p_cw_ref = 3.5355339059327378_wp + + CALL TAG_TEST("TEST_gvec2cvec_sphere_cu") + CALL ASSERT_EQUAL(p_cu, p_cu_ref) + CALL TAG_TEST("TEST_gvec2cvec_sphere_cv") + CALL ASSERT_EQUAL(p_cv, p_cv_ref) + CALL TAG_TEST("TEST_gvec2cvec_sphere_cw") + CALL ASSERT_EQUAL(p_cw, p_cw_ref) + + geometry_info%geometry_type = planar_torus_geometry + + CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info) + + CALL TAG_TEST("TEST_gvec2cvec_torus_cu") + CALL ASSERT_EQUAL(p_cu, p_gu) + CALL TAG_TEST("TEST_gvec2cvec_torus_cv") + CALL ASSERT_EQUAL(p_cv, p_gv) + CALL TAG_TEST("TEST_gvec2cvec_torus_cw") + CALL ASSERT_EQUAL(p_cw, 0.0_wp) + + END SUBROUTINE TEST_gvec2cvec + + SUBROUTINE TEST_cvec2gvec + USE mo_math_utilities, ONLY: cvec2gvec + + REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector + REAL(wp) :: p_long, p_lat ! geo. coord. of data point + TYPE(t_grid_geometry_info) :: geometry_info + + REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. comp. + REAL(wp) :: p_gu_ref, p_gv_ref + + geometry_info%geometry_type = sphere_geometry + + p_cu = -10.0_wp + p_cv = -3.0_wp + p_cw = 3.0_wp + + p_long = pi_2 + p_lat = pi_4 + + CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info) + +! write(*,"(a,f24.16)") ' p_gu: ', p_gu +! write(*,"(a,f24.16)") ' p_gv: ', p_gv + + p_gu_ref = 10.0_wp + p_gv_ref = 4.2426406871192857_wp + + CALL TAG_TEST("TEST_cvec2gvec_sphere_gu") + CALL ASSERT_EQUAL(p_gu, p_gu_ref) + CALL TAG_TEST("TEST_cvec2gvec_sphere_gv") + CALL ASSERT_EQUAL(p_gv, p_gv_ref) + + geometry_info%geometry_type = planar_torus_geometry + + CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info) + + CALL TAG_TEST("TEST_cvec2gvec_torus_gu") + CALL ASSERT_EQUAL(p_gu, p_cu) + CALL TAG_TEST("TEST_cvec2gvec_torus_gv") + CALL ASSERT_EQUAL(p_gv, p_cv) + + END SUBROUTINE TEST_cvec2gvec + + SUBROUTINE TEST_tdma_solver_vec + + USE mo_math_utilities, ONLY: tdma_solver_vec + + INTEGER, PARAMETER :: n = 10 + REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n) + INTEGER :: i, j + REAL(wp) :: sum, sum_ref + DO i = 1, n + DO j = 1, n + a(i, j) = 1.0_wp + b(i, j) = 2.0_wp + c(i, j) = 1.0_wp + d(i, j) = 1.0_wp + END DO + END DO + CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) + sum = 0.0_wp + DO i = 1, n + sum = sum + x(i, 1) + END DO + sum_ref = 4.5454545454545467_wp + CALL TAG_TEST("TEST_tdma_solver_vec") + CALL ASSERT_EQUAL(sum, sum_ref) + END SUBROUTINE TEST_tdma_solver_vec + +END MODULE TEST_mo_math_utilities -- GitLab From 147e4b40b318a58abb41eccc3a676a35d3b76ea3 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:54:28 +0100 Subject: [PATCH 03/50] updated the gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 37de517..18c1a34 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ iconmath_Tests # Test stage files: /**/Testing/* run_tests.f90 +iconmath_Tests +run_tests.f90 -- GitLab From 86cd7136a1664dde92b4ebc1e0c19ffbbd439140 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 10:04:20 +0100 Subject: [PATCH 04/50] fixed a style-check issue --- test/fortran/test_math_utilities.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 25064f5..70fd9ab 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -24,7 +24,7 @@ MODULE TEST_mo_math_utilities PRIVATE PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec - REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius + REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius CONTAINS -- GitLab From 8539e26d2e03bb93b25b4f682c60bc0624c742c4 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 08:54:28 +0100 Subject: [PATCH 05/50] updated the gitignore file --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 18c1a34..23535bf 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,5 @@ iconmath_Tests # Test stage files: /**/Testing/* -run_tests.f90 iconmath_Tests run_tests.f90 -- GitLab From 32e75f2317260a1bcb074171dc58e867e222ab41 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 14:57:31 +0100 Subject: [PATCH 06/50] changed the extension of mo_lib_loopindices to F90 --- src/support/CMakeLists.txt | 2 +- src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} (100%) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 8e5fcc4..0bce304 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -13,7 +13,7 @@ add_library( iconmath-support mo_gridman_constants.f90 mo_lib_grid_geometry_info.f90 - mo_lib_loopindices.f90 + mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 mo_math_utilities.F90 diff --git a/src/support/mo_lib_loopindices.f90 b/src/support/mo_lib_loopindices.F90 similarity index 100% rename from src/support/mo_lib_loopindices.f90 rename to src/support/mo_lib_loopindices.F90 -- GitLab From e9e1506bbb05fccc25ce47226b688c6ddaf6c44c Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 14:58:40 +0100 Subject: [PATCH 07/50] added the cpp version of mo_lib_loopindices and compiled the code made changes to debug the last version --- CMakeLists.txt | 2 +- src/support/CMakeLists.txt | 12 +++++++- src/support/mo_lib_loopindices.cpp | 46 ++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 src/support/mo_lib_loopindices.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index daafc5d..c40cd40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.18) project( iconmath VERSION 1.0.0 - LANGUAGES Fortran) + LANGUAGES Fortran CXX) option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(BUILD_TESTING "Build tests" ON) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 0bce304..6d2fd78 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( iconmath-support mo_gridman_constants.f90 mo_lib_grid_geometry_info.f90 + mo_lib_loopindices.cpp mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 @@ -57,7 +58,16 @@ target_include_directories( # Path to the Fortran modules: $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>> $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> -) + INTERFACE + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp new file mode 100644 index 0000000..9810427 --- /dev/null +++ b/src/support/mo_lib_loopindices.cpp @@ -0,0 +1,46 @@ +#include <algorithm> // For std::max + +extern "C" { + // get_indices_c_lib function + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + if (i_blk == i_startblk) { + i_startidx_out = std::max(1, i_startidx_in); + i_endidx_out = nproma; + if (i_blk == i_endblk) { + i_endidx_out = i_endidx_in; + } + } else if (i_blk == i_endblk) { + i_startidx_out = 1; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = 1; + i_endidx_out = nproma; + } + } + + // get_indices_e_lib function + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in); + i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; + } + + // get_indices_v_lib function + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out) { + if (i_blk == i_startblk) { + i_startidx_out = i_startidx_in; + i_endidx_out = nproma; + if (i_blk == i_endblk) { + i_endidx_out = i_endidx_in; + } + } else if (i_blk == i_endblk) { + i_startidx_out = 1; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = 1; + i_endidx_out = nproma; + } + } +} -- GitLab From ad6259025c880d4a9f1e4009c2942fc1882065de Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 31 Dec 2024 15:07:21 +0100 Subject: [PATCH 08/50] enabled the use of cpp bindings for mo_lib_loopindices --- CMakeLists.txt | 1 + src/support/CMakeLists.txt | 4 ++++ src/support/mo_lib_loopindices.F90 | 34 +++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c40cd40..2f32fcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ option(BUILD_ICONMATH_HORIZONTAL "Build horizontal library" ON) option(IM_ENABLE_MIXED_PRECISION "Enable mixed precision" OFF) option(IM_ENABLE_LOOP_EXCHANGE "Enable loop exchange" OFF) +option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF) option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF) option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF) option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 6d2fd78..c0fc287 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -41,6 +41,10 @@ if(IM_ENABLE_DIM_SWAP) target_compile_definitions(iconmath-support PRIVATE __SWAPDIM) endif() +if(IM_USE_CPP_BINDINGS) + target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS) +endif() + if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90 index fe6c9b9..3ac80dd 100644 --- a/src/support/mo_lib_loopindices.F90 +++ b/src/support/mo_lib_loopindices.F90 @@ -16,12 +16,18 @@ MODULE mo_lib_loopindices +#ifdef __USE_CPP_BINDINGS + USE, INTRINSIC :: ISO_C_BINDING +#endif + IMPLICIT NONE PRIVATE PUBLIC :: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib +#ifndef __USE_CPP_BINDINGS + CONTAINS !------------------------------------------------------------------------- @@ -121,5 +127,31 @@ CONTAINS END SUBROUTINE get_indices_v_lib -END MODULE mo_lib_loopindices +#else + + INTERFACE + SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_c_lib + + SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_e_lib + + SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & + i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib") + IMPORT :: C_INT + INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(C_INT) :: i_startidx_out, i_endidx_out + END SUBROUTINE get_indices_v_lib + END INTERFACE + +#endif +END MODULE mo_lib_loopindices -- GitLab From a7402469ee13ce7266fa397c6e31c5e48e7130c1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 2 Jan 2025 22:24:47 +0100 Subject: [PATCH 09/50] added the cpp version of tdma_solver_vec made it compile --- CMakeLists.txt | 2 + src/support/CMakeLists.txt | 1 + src/support/mo_math_utilities.F90 | 164 +++++++++++++++++------------- src/support/mo_math_utilities.cpp | 77 ++++++++++++++ 4 files changed, 172 insertions(+), 72 deletions(-) create mode 100644 src/support/mo_math_utilities.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f32fcf..8fb4acf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ project( VERSION 1.0.0 LANGUAGES Fortran CXX) +set(CMAKE_CXX_STANDARD 17) + option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(BUILD_TESTING "Build tests" ON) option(BUILD_ICONMATH_INTERPOLATION "Build interpolation library" ON) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index c0fc287..35e1c71 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -17,6 +17,7 @@ add_library( mo_lib_loopindices.F90 mo_math_constants.f90 mo_math_types.f90 + mo_math_utilities.cpp mo_math_utilities.F90 mo_random_number_generators.F90) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index fa7ea20..3461cec 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -22,6 +22,7 @@ ! #endif MODULE mo_math_utilities + USE, INTRINSIC :: ISO_C_BINDING USE mo_iconlib_kind, ONLY: wp, dp, sp USE mo_math_constants, ONLY: pi, pi_2, dbl_eps USE mo_gridman_constants, ONLY: SUCCESS, TORUS_MAX_LAT @@ -160,7 +161,98 @@ MODULE mo_math_utilities CHARACTER(LEN=*), PARAMETER :: modname = 'mo_math_utilities' + !------------------------------------------------------------------------- + !> + !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i + !! + !! a - sub-diagonal (means it is the diagonal below the main diagonal) + !! b - the main diagonal + !! c - sup-diagonal (means it is the diagonal above the main diagonal) + !! d - right part + !! varout - the answer (identical to x in description above) + !! slev - start level (top) + !! elev - end level (bottom) + +! Preprocessor directive to conditionally include the tdma_solver_vec implementation +#ifndef __USE_CPP_BINDINGS + + CONTAINS + + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) + INTEGER, INTENT(IN) :: slev, elev + INTEGER, INTENT(IN) :: startidx, endidx + REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :) + REAL(wp), INTENT(OUT) :: varout(:, :) + INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue + + ! + ! local + REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2)) + INTEGER :: i + INTEGER :: jc + INTEGER :: acc_queue + + IF (PRESENT(opt_acc_queue)) THEN + acc_queue = opt_acc_queue + ELSE + acc_queue = 1 + END IF + + ! initialize c-prime and d-prime + !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue) + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + c_p(jc, slev) = c(jc, slev)/b(jc, slev) + d_p(jc, slev) = d(jc, slev)/b(jc, slev) + END DO + ! solve for vectors c-prime and d-prime + !$ACC LOOP SEQ +!NEC$ outerloop_unroll(4) + DO i = slev + 1, elev + !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m) + DO jc = startidx, endidx + m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i)) + c_p(jc, i) = c(jc, i)*m + d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m + END DO + END DO + ! initialize varout + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + varout(jc, elev) = d_p(jc, elev) + END DO + ! solve for varout from the vectors c-prime and d-prime + !$ACC LOOP SEQ +!NEC$ outerloop_unroll(4) + DO i = elev - 1, slev, -1 + !$ACC LOOP GANG(STATIC: 1) VECTOR + DO jc = startidx, endidx + varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1) + END DO + END DO + !$ACC END PARALLEL + + IF (.NOT. PRESENT(opt_acc_queue)) THEN + !$ACC WAIT(acc_queue) + END IF + + END SUBROUTINE tdma_solver_vec + +#else + + ! C++ binding for tdma_solver_vec + INTERFACE + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + IMPORT :: C_DOUBLE, C_INT + REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) + INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx + REAL(C_DOUBLE), INTENT(OUT) :: varout(*) + INTEGER(C_INT), OPTIONAL :: opt_acc_queue + END SUBROUTINE tdma_solver_vec + END INTERFACE + CONTAINS +#endif !------------------------------------------------------------------------- ! Variant for double-precision (or working-precision=dp) lon+lat in ICON @@ -2019,78 +2111,6 @@ CONTAINS END SUBROUTINE tdma_solver - !------------------------------------------------------------------------- - !> - !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i - !! - !! a - sub-diagonal (means it is the diagonal below the main diagonal) - !! b - the main diagonal - !! c - sup-diagonal (means it is the diagonal above the main diagonal) - !! d - right part - !! varout - the answer (identical to x in description above) - !! slev - start level (top) - !! elev - end level (bottom) - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) - INTEGER, INTENT(IN) :: slev, elev - INTEGER, INTENT(IN) :: startidx, endidx - REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :) - REAL(wp), INTENT(OUT) :: varout(:, :) - INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue - - ! - ! local - REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2)) - INTEGER :: i - INTEGER :: jc - INTEGER :: acc_queue - - IF (PRESENT(opt_acc_queue)) THEN - acc_queue = opt_acc_queue - ELSE - acc_queue = 1 - END IF - - ! initialize c-prime and d-prime - !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue) - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - c_p(jc, slev) = c(jc, slev)/b(jc, slev) - d_p(jc, slev) = d(jc, slev)/b(jc, slev) - END DO - ! solve for vectors c-prime and d-prime - !$ACC LOOP SEQ -!NEC$ outerloop_unroll(4) - DO i = slev + 1, elev - !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m) - DO jc = startidx, endidx - m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i)) - c_p(jc, i) = c(jc, i)*m - d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m - END DO - END DO - ! initialize varout - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - varout(jc, elev) = d_p(jc, elev) - END DO - ! solve for varout from the vectors c-prime and d-prime - !$ACC LOOP SEQ -!NEC$ outerloop_unroll(4) - DO i = elev - 1, slev, -1 - !$ACC LOOP GANG(STATIC: 1) VECTOR - DO jc = startidx, endidx - varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1) - END DO - END DO - !$ACC END PARALLEL - - IF (.NOT. PRESENT(opt_acc_queue)) THEN - !$ACC WAIT(acc_queue) - END IF - - END SUBROUTINE tdma_solver_vec - !------------------------------------------------------------------------- - !------------------------------------------------------------------------- ! !> Helper functions for computing the vertical layer structure diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp new file mode 100644 index 0000000..a8ccce4 --- /dev/null +++ b/src/support/mo_math_utilities.cpp @@ -0,0 +1,77 @@ +#include <vector> +#include <iostream> +#include <chrono> // For timing + +extern "C" { + +void tdma_solver_vec(double *a, double *b, double *c, double *d, + int slev, int elev, int startidx, int endidx, + double* varout, int opt_acc_queue = -1) { + + int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided + + // Determine array sizes based on startidx and endidx + int nrows = endidx - startidx; + int ncols = elev - slev; + + // Temporary arrays for c-prime and d-prime + std::vector<double> cp(nrows * ncols, 0.0); + std::vector<double> dp(nrows * ncols, 0.0); + + // Helper function to access 2D arrays stored as 1D + auto idx = [&](int row, int col) { return col * nrows + row; }; + + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + + // OpenACC Parallel Region + #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) + { + // Initialize c-prime and d-prime + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; + dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; + } + + // Solve for vectors c-prime and d-prime + #pragma acc loop seq + for (int i = slev + 1; i < elev; ++i) { + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); + cp[idx(jc, i)] = c[idx(jc, i)] * m; + dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; + } + } + + // Initialize varout + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; + } + + // Solve for varout from the vectors c-prime and d-prime + #pragma acc loop seq + for (int i = elev - 2; i >= slev; --i) { + #pragma acc loop gang(static: 1) vector + for (int jc = startidx; jc < endidx; ++jc) { + varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; + } + } + } + + printf("tdma_solver_vec: completed using C++\n"); + + // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional + if (opt_acc_queue == -1) { + #pragma acc wait(acc_queue) + } + + // End timing + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> elapsed_time = end_time - start_time; + + std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl; +} +} -- GitLab From c043b6df79b7e099b99b0681ecb20b1d717e665d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 2 Jan 2025 22:29:20 +0100 Subject: [PATCH 10/50] added the test for the cpp binding of tdma_solver_vec fixed a bug in testing --- src/support/CMakeLists.txt | 2 +- test/fortran/test_math_utilities.f90 | 29 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 35e1c71..7af714e 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -43,7 +43,7 @@ if(IM_ENABLE_DIM_SWAP) endif() if(IM_USE_CPP_BINDINGS) - target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS) + target_compile_definitions(iconmath-support PUBLIC __USE_CPP_BINDINGS) endif() if(IM_ENABLE_OPENACC) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 70fd9ab..8dcf5b3 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -248,7 +248,9 @@ CONTAINS INTEGER, PARAMETER :: n = 10 REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n) INTEGER :: i, j - REAL(wp) :: sum, sum_ref + REAL(wp) :: sum, sum_ref, tol + REAL(wp) :: start_time, end_time, elapsed_time + DO i = 1, n DO j = 1, n a(i, j) = 1.0_wp @@ -257,14 +259,33 @@ CONTAINS d(i, j) = 1.0_wp END DO END DO + + CALL CPU_TIME(start_time) +#ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) +#else + CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1) +#endif + CALL CPU_TIME(end_time) + + ! Compute elapsed time + elapsed_time = end_time - start_time + + ! Output timing result + write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" + sum = 0.0_wp DO i = 1, n - sum = sum + x(i, 1) + DO j = 1, n + sum = sum + x(i, j) + ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1) + END DO END DO - sum_ref = 4.5454545454545467_wp + sum_ref = 27.2727272727272769_wp + tol = 1d-15 CALL TAG_TEST("TEST_tdma_solver_vec") - CALL ASSERT_EQUAL(sum, sum_ref) + CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) + END SUBROUTINE TEST_tdma_solver_vec END MODULE TEST_mo_math_utilities -- GitLab From 8a55bbb6033537b38d94d82b67c2cf92176c4338 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:31:02 +0100 Subject: [PATCH 11/50] changed the way local arrays are defined, it improves the performance fixed a bug --- src/support/mo_math_utilities.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index a8ccce4..f82cd27 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -14,9 +14,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, int nrows = endidx - startidx; int ncols = elev - slev; - // Temporary arrays for c-prime and d-prime - std::vector<double> cp(nrows * ncols, 0.0); - std::vector<double> dp(nrows * ncols, 0.0); + double* cp = new double[nrows * ncols]; + double* dp = new double[nrows * ncols]; // Helper function to access 2D arrays stored as 1D auto idx = [&](int row, int col) { return col * nrows + row; }; @@ -68,6 +67,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, #pragma acc wait(acc_queue) } + // Free memory at the end + delete[] cp; + delete[] dp; // End timing auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; -- GitLab From bb9cc2524804f39c4538e447fcce4f9deaf0f999 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:34:24 +0100 Subject: [PATCH 12/50] replaced the lambda function for calculating combined index with a macro function fixed a bug --- src/support/mo_math_utilities.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index f82cd27..c2b46dc 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, double* cp = new double[nrows * ncols]; double* dp = new double[nrows * ncols]; - // Helper function to access 2D arrays stored as 1D - auto idx = [&](int row, int col) { return col * nrows + row; }; - - // Start timing - auto start_time = std::chrono::high_resolution_clock::now(); + #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function // OpenACC Parallel Region #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) @@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Initialize c-prime and d-prime #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; - dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; + cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)]; + dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)]; } // Solve for vectors c-prime and d-prime @@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = slev + 1; i < elev; ++i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); - cp[idx(jc, i)] = c[idx(jc, i)] * m; - dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; + double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]); + cp[IDX(jc, i)] = c[IDX(jc, i)] * m; + dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m; } } // Initialize varout #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; + varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)]; } // Solve for varout from the vectors c-prime and d-prime @@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = elev - 2; i >= slev; --i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; + varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)]; } } } @@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Free memory at the end delete[] cp; delete[] dp; + // End timing auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; -- GitLab From d03022c361dfb3fc42f0c9ad0efa5c57d9a6e159 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:35:30 +0100 Subject: [PATCH 13/50] change the place of start the timer --- src/support/mo_math_utilities.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index c2b46dc..45430b0 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -8,6 +8,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, int slev, int elev, int startidx, int endidx, double* varout, int opt_acc_queue = -1) { + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided // Determine array sizes based on startidx and endidx -- GitLab From bf0ac7c89ef60cc5482b83d7418410531d48de26 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:08:00 +0100 Subject: [PATCH 14/50] added nrows and ncols as arguments to the cpp routine of tdma_solver_vec --- src/support/mo_math_utilities.F90 | 4 ++-- src/support/mo_math_utilities.cpp | 6 +----- test/fortran/test_math_utilities.f90 | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 3461cec..c62af5c 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -242,10 +242,10 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") IMPORT :: C_DOUBLE, C_INT REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) - INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx + INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(C_DOUBLE), INTENT(OUT) :: varout(*) INTEGER(C_INT), OPTIONAL :: opt_acc_queue END SUBROUTINE tdma_solver_vec diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index 45430b0..ff94b89 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -6,17 +6,13 @@ extern "C" { void tdma_solver_vec(double *a, double *b, double *c, double *d, int slev, int elev, int startidx, int endidx, - double* varout, int opt_acc_queue = -1) { + int nrows, int ncols, double *varout, int opt_acc_queue = -1) { // Start timing auto start_time = std::chrono::high_resolution_clock::now(); int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided - // Determine array sizes based on startidx and endidx - int nrows = endidx - startidx; - int ncols = elev - slev; - double* cp = new double[nrows * ncols]; double* dp = new double[nrows * ncols]; diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 8dcf5b3..9f95ca7 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -264,7 +264,7 @@ CONTAINS #ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) #else - CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1) + CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, n, n, x, -1) #endif CALL CPU_TIME(end_time) -- GitLab From c0979bbaaf1fa1a6e8fb8bba370a031d870fff3f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:20:50 +0100 Subject: [PATCH 15/50] added an additional test for tdma_solver_vec --- test/fortran/test_math_utilities.f90 | 31 +++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90 index 9f95ca7..db8b824 100644 --- a/test/fortran/test_math_utilities.f90 +++ b/test/fortran/test_math_utilities.f90 @@ -253,13 +253,15 @@ CONTAINS DO i = 1, n DO j = 1, n - a(i, j) = 1.0_wp - b(i, j) = 2.0_wp - c(i, j) = 1.0_wp - d(i, j) = 1.0_wp + a(i, j) = 1.0_wp*(i+j) + b(i, j) = 2.0_wp*(i+j) + c(i, j) = 1.0_wp*(i+j) + d(i, j) = 1.0_wp*(i+j) END DO END DO + tol = 1d-15 + CALL CPU_TIME(start_time) #ifndef __USE_CPP_BINDINGS CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x) @@ -278,12 +280,27 @@ CONTAINS DO i = 1, n DO j = 1, n sum = sum + x(i, j) - ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1) END DO END DO sum_ref = 27.2727272727272769_wp - tol = 1d-15 - CALL TAG_TEST("TEST_tdma_solver_vec") + CALL TAG_TEST("TEST_tdma_solver_vec_full") + CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) + + x = 0.0_wp +#ifndef __USE_CPP_BINDINGS + CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x) +#else + CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1) +#endif + sum = 0.0_wp + DO i = 2, n-1 + DO j = 2, n-1 + sum = sum + x(i, j) + END DO + END DO + sum_ref = 17.7777777777777679_wp + + CALL TAG_TEST("TEST_tdma_solver_vec_partial") CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol) END SUBROUTINE TEST_tdma_solver_vec -- GitLab From 6aef957598bf6a045db290284db9adf9be86a1d8 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:44:43 +0100 Subject: [PATCH 16/50] updated the extension of test_math_utilities --- test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} (100%) diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.F90 similarity index 100% rename from test/fortran/test_math_utilities.f90 rename to test/fortran/test_math_utilities.F90 -- GitLab From 8aa6af717a93922f783f4f5e22dd75bd6e81df49 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 11:58:39 +0100 Subject: [PATCH 17/50] fixed a style formatting issue --- src/support/mo_math_utilities.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index c62af5c..9bcb7a0 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -242,7 +242,8 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec") IMPORT :: C_DOUBLE, C_INT REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols -- GitLab From 1e78f91c6218796a5e6b7bde6d67e7755e4debd7 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 12:06:42 +0100 Subject: [PATCH 18/50] fixed further style formatting issues --- src/support/mo_lib_loopindices.F90 | 18 +++++++++--------- src/support/mo_math_utilities.F90 | 16 ++++++++-------- test/fortran/test_math_utilities.F90 | 18 +++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90 index 3ac80dd..ce67af7 100644 --- a/src/support/mo_lib_loopindices.F90 +++ b/src/support/mo_lib_loopindices.F90 @@ -132,23 +132,23 @@ CONTAINS INTERFACE SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_c_lib SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_e_lib SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, & i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib") - IMPORT :: C_INT - INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk - INTEGER(C_INT) :: i_startidx_out, i_endidx_out + IMPORT :: c_int + INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk + INTEGER(c_int) :: i_startidx_out, i_endidx_out END SUBROUTINE get_indices_v_lib END INTERFACE diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 9bcb7a0..9c2897f 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -176,7 +176,7 @@ MODULE mo_math_utilities ! Preprocessor directive to conditionally include the tdma_solver_vec implementation #ifndef __USE_CPP_BINDINGS - CONTAINS +CONTAINS SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) INTEGER, INTENT(IN) :: slev, elev @@ -242,13 +242,13 @@ MODULE mo_math_utilities ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec") - IMPORT :: C_DOUBLE, C_INT - REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*) - INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols - REAL(C_DOUBLE), INTENT(OUT) :: varout(*) - INTEGER(C_INT), OPTIONAL :: opt_acc_queue + SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec") + IMPORT :: c_double, c_int + REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) + INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols + REAL(c_double), INTENT(OUT) :: varout(*) + INTEGER(c_int), OPTIONAL :: opt_acc_queue END SUBROUTINE tdma_solver_vec END INTERFACE diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.F90 index db8b824..07dcfb3 100644 --- a/test/fortran/test_math_utilities.F90 +++ b/test/fortran/test_math_utilities.F90 @@ -253,10 +253,10 @@ CONTAINS DO i = 1, n DO j = 1, n - a(i, j) = 1.0_wp*(i+j) - b(i, j) = 2.0_wp*(i+j) - c(i, j) = 1.0_wp*(i+j) - d(i, j) = 1.0_wp*(i+j) + a(i, j) = 1.0_wp*(i + j) + b(i, j) = 2.0_wp*(i + j) + c(i, j) = 1.0_wp*(i + j) + d(i, j) = 1.0_wp*(i + j) END DO END DO @@ -274,7 +274,7 @@ CONTAINS elapsed_time = end_time - start_time ! Output timing result - write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" + WRITE (*, *) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds" sum = 0.0_wp DO i = 1, n @@ -288,13 +288,13 @@ CONTAINS x = 0.0_wp #ifndef __USE_CPP_BINDINGS - CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x) + CALL tdma_solver_vec(a, b, c, d, 2, n - 1, 2, n - 1, x) #else - CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1) + CALL tdma_solver_vec(a, b, c, d, 1, n - 1, 1, n - 1, n, n, x, -1) #endif sum = 0.0_wp - DO i = 2, n-1 - DO j = 2, n-1 + DO i = 2, n - 1 + DO j = 2, n - 1 sum = sum + x(i, j) END DO END DO -- GitLab From 5106ddc1bfd6538a81772780006825af0731f097 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 12:10:24 +0100 Subject: [PATCH 19/50] added licences to the new cpp files --- src/support/mo_lib_loopindices.cpp | 11 +++++++++++ src/support/mo_math_utilities.cpp | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index 9810427..e6d9d21 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <algorithm> // For std::max extern "C" { diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index ff94b89..e171606 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <vector> #include <iostream> #include <chrono> // For timing -- GitLab From da4294b4f3abab096810470bb2407307e3049430 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 14:59:55 +0100 Subject: [PATCH 20/50] enabled compilation using Kokkos --- src/support/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 7af714e..b4ceb37 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -57,6 +57,9 @@ if(IM_ENABLE_OPENACC) endif() endif() +message(STATUS "iconmath-support enabling Kokkos") +find_package(Kokkos REQUIRED) + target_include_directories( iconmath-support PUBLIC @@ -74,7 +77,7 @@ target_include_directories( # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) -target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support) +target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From e70952a76fba137c4167e023c51be20610f913d1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 15:00:45 +0100 Subject: [PATCH 21/50] converted the c++ code in mo_math_utilities to Kokkos --- src/support/mo_math_utilities.cpp | 128 ++++++++++++++---------------- 1 file changed, 61 insertions(+), 67 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index e171606..b3031a5 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -12,75 +12,69 @@ #include <vector> #include <iostream> #include <chrono> // For timing +#include <Kokkos_Core.hpp> extern "C" { -void tdma_solver_vec(double *a, double *b, double *c, double *d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double *varout, int opt_acc_queue = -1) { - - // Start timing - auto start_time = std::chrono::high_resolution_clock::now(); - - int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided - - double* cp = new double[nrows * ncols]; - double* dp = new double[nrows * ncols]; - - #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function - - // OpenACC Parallel Region - #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) - { - // Initialize c-prime and d-prime - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)]; - dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)]; - } - - // Solve for vectors c-prime and d-prime - #pragma acc loop seq - for (int i = slev + 1; i < elev; ++i) { - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]); - cp[IDX(jc, i)] = c[IDX(jc, i)] * m; - dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m; - } - } - - // Initialize varout - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)]; - } - - // Solve for varout from the vectors c-prime and d-prime - #pragma acc loop seq - for (int i = elev - 2; i >= slev; --i) { - #pragma acc loop gang(static: 1) vector - for (int jc = startidx; jc < endidx; ++jc) { - varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)]; - } - } - } - - printf("tdma_solver_vec: completed using C++\n"); - - // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional - if (opt_acc_queue == -1) { - #pragma acc wait(acc_queue) - } - - // Free memory at the end - delete[] cp; - delete[] dp; - - // End timing - auto end_time = std::chrono::high_resolution_clock::now(); - std::chrono::duration<double> elapsed_time = end_time - start_time; - - std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl; +void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout) { + + // Start timing + auto start_time = std::chrono::high_resolution_clock::now(); + + // Allocate temporary arrays using Kokkos::View. + // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols]. + // Kokkos::View automatically handles memory management. + Kokkos::View<double**> c_p("c_p", nrows, ncols); + Kokkos::View<double**> d_p("d_p", nrows, ncols); + + // Wrap the input arrays in unmanaged views. + // We assume that the input arrays are laid out in column-major order as in the original code. + // Here we use LayoutLeft so that the first index (row) is contiguous. + typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; + typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; + UnmanagedConst2D a_view(a, nrows, ncols); + UnmanagedConst2D b_view(b, nrows, ncols); + UnmanagedConst2D c_view(c, nrows, ncols); + UnmanagedConst2D d_view(d, nrows, ncols); + Unmanaged2D varout_view(varout, nrows, ncols); + + // Initialize c-prime and d-prime at the starting level (slev) + Kokkos::parallel_for("init_c_p_d_p", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + c_p(jc, slev) = c_view(jc, slev) / b_view(jc, slev); + d_p(jc, slev) = d_view(jc, slev) / b_view(jc, slev); + }); + Kokkos::fence(); + + // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1. + for (int i = slev + 1; i < elev; ++i) { + Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); + c_p(jc, i) = c_view(jc, i) * m; + d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m; + }); + Kokkos::fence(); + } + + // Initialize the output array at the last level (elev-1) + Kokkos::parallel_for("init_varout", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + varout_view(jc, elev-1) = d_p(jc, elev-1); + }); + Kokkos::fence(); + + // Back substitution: update varout for columns from elev-2 down to slev. + for (int i = elev - 2; i >= slev; --i) { + Kokkos::parallel_for("back_substitution", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { + varout_view(jc, i) = d_p(jc, i) - c_p(jc, i) * varout_view(jc, i + 1); + }); + Kokkos::fence(); + } + + // End timing and print the elapsed time + auto end_time = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> elapsed_time = end_time - start_time; + std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } + } -- GitLab From 136db1183f9b3bf1ddb317ad2d26c448b7c02a30 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 17:16:59 +0100 Subject: [PATCH 22/50] build kokkos internally along with the package --- CMakeLists.txt | 16 ++++++++++++++++ src/support/CMakeLists.txt | 10 ++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fb4acf..ab93b92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,22 @@ else() endif() endif() +include(FetchContent) +# configure kokkos 4.4 repository link +FetchContent_Declare(kokkos + URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz + URL_HASH MD5=eafd0d42c9831858aa84fde78576644c +) + +# disable build of C++23 mdspan experimental support for now +set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") + +# by default, build the Kokkos serial backend for CPU +set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend") +set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations") + +FetchContent_MakeAvailable(kokkos) + add_subdirectory(src) # Allow for 'make test' even if the tests are disabled: diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index b4ceb37..9f56017 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -57,9 +57,6 @@ if(IM_ENABLE_OPENACC) endif() endif() -message(STATUS "iconmath-support enabling Kokkos") -find_package(Kokkos REQUIRED) - target_include_directories( iconmath-support PUBLIC @@ -77,7 +74,12 @@ target_include_directories( # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>) -target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos) +target_link_libraries(iconmath-support + PUBLIC + fortran-support::fortran-support + PRIVATE + Kokkos::kokkos +) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From b865029415ea2421332985d4c27b69e6642ace8b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 17:27:31 +0100 Subject: [PATCH 23/50] changed cmake format --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab93b92..26137c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,10 +112,10 @@ endif() include(FetchContent) # configure kokkos 4.4 repository link -FetchContent_Declare(kokkos - URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz - URL_HASH MD5=eafd0d42c9831858aa84fde78576644c -) +FetchContent_Declare( + kokkos + URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz + URL_HASH MD5=eafd0d42c9831858aa84fde78576644c) # disable build of C++23 mdspan experimental support for now set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") -- GitLab From d026fe4459e14ce5d7af6d4a2fc8873f3e8b92b9 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 18:01:11 +0100 Subject: [PATCH 24/50] fixed an error in cmake styling --- .cmake-format.py | 1 + CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.cmake-format.py b/.cmake-format.py index 31f47fa..1b19e4e 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -37,3 +37,4 @@ with section("lint"): local_var_pattern = '[a-zA-Z][0-9a-zA-z_]+' private_var_pattern = '[a-z][a-z0-9_]+' public_var_pattern = '[A-Z][0-9a-zA-Z_]+' + global_var_pattern = '[A-Z][0-9a-zA-Z_]+' diff --git a/CMakeLists.txt b/CMakeLists.txt index 26137c4..7cf92be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ FetchContent_Declare( URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz URL_HASH MD5=eafd0d42c9831858aa84fde78576644c) -# disable build of C++23 mdspan experimental support for now +# disable build of C++23 mdspan experimental support for now set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") # by default, build the Kokkos serial backend for CPU -- GitLab From fbae7837c92035d6cd952c0696aa2138a167406d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 21 Feb 2025 18:04:04 +0100 Subject: [PATCH 25/50] updated the gitignore file --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 23535bf..8123bd2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,12 @@ cmake_install.cmake iconmath-config-version.cmake iconmath-config.cmake iconmath-targets.cmake +KokkosConfig.cmake +KokkosConfigVersion.cmake +KokkosTargets.cmake +KokkosConfigCommon.cmake +Kokkos_Version_Info.cpp +Kokkos_Version_Info.hpp # Build stage files: *.L -- GitLab From f01010898e3732b505372f10332e02b0ad631dca Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 14:25:53 +0100 Subject: [PATCH 26/50] made the cpp function a templated one --- src/support/mo_math_utilities.F90 | 10 ++++-- src/support/mo_math_utilities.cpp | 31 +++++++++++-------- src/support/mo_math_utilities.hpp | 15 +++++++++ ..._utilities.F90 => test_math_utilities.f90} | 0 4 files changed, 40 insertions(+), 16 deletions(-) create mode 100644 src/support/mo_math_utilities.hpp rename test/fortran/{test_math_utilities.F90 => test_math_utilities.f90} (100%) diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 9c2897f..168ec26 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -79,7 +79,11 @@ MODULE mo_math_utilities PUBLIC :: line_intersect PUBLIC :: lintersect PUBLIC :: tdma_solver +#ifndef __USE_CPP_BINDINGS PUBLIC :: tdma_solver_vec +#else + PUBLIC :: tdma_solver_vec_double +#endif PUBLIC :: check_orientation ! vertical coordinates routines @@ -242,14 +246,14 @@ CONTAINS ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec") + SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec_double") IMPORT :: c_double, c_int REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(c_double), INTENT(OUT) :: varout(*) INTEGER(c_int), OPTIONAL :: opt_acc_queue - END SUBROUTINE tdma_solver_vec + END SUBROUTINE tdma_solver_vec_double END INTERFACE CONTAINS diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index b3031a5..6a60f2c 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -9,16 +9,12 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <vector> -#include <iostream> -#include <chrono> // For timing -#include <Kokkos_Core.hpp> +#include "mo_math_utilities.hpp" -extern "C" { - -void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d, +template <typename T> +void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout) { + int nrows, int ncols, T* varout) { // Start timing auto start_time = std::chrono::high_resolution_clock::now(); @@ -26,14 +22,14 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c // Allocate temporary arrays using Kokkos::View. // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols]. // Kokkos::View automatically handles memory management. - Kokkos::View<double**> c_p("c_p", nrows, ncols); - Kokkos::View<double**> d_p("d_p", nrows, ncols); + Kokkos::View<T**> c_p("c_p", nrows, ncols); + Kokkos::View<T**> d_p("d_p", nrows, ncols); // Wrap the input arrays in unmanaged views. // We assume that the input arrays are laid out in column-major order as in the original code. // Here we use LayoutLeft so that the first index (row) is contiguous. - typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; - typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; + typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D; + typedef Kokkos::View<T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D; UnmanagedConst2D a_view(a, nrows, ncols); UnmanagedConst2D b_view(b, nrows, ncols); UnmanagedConst2D c_view(c, nrows, ncols); @@ -50,7 +46,7 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1. for (int i = slev + 1; i < elev; ++i) { Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) { - double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); + T m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i)); c_p(jc, i) = c_view(jc, i) * m; d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m; }); @@ -71,10 +67,19 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c Kokkos::fence(); } + c_p = Kokkos::View<T**>(); + d_p = Kokkos::View<T**>(); // End timing and print the elapsed time auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } +extern "C" { + + void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout) { + tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + } } diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp new file mode 100644 index 0000000..20b1f44 --- /dev/null +++ b/src/support/mo_math_utilities.hpp @@ -0,0 +1,15 @@ +#include <vector> +#include <iostream> +#include <chrono> // For timing +#include <Kokkos_Core.hpp> + +template <typename T> +void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, T* varout); + +extern "C" { + void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); +} diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.f90 similarity index 100% rename from test/fortran/test_math_utilities.F90 rename to test/fortran/test_math_utilities.f90 -- GitLab From e46960c7181b86066e142d950123446fe2f17270 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 14:47:17 +0100 Subject: [PATCH 27/50] added unit-tests for c++ codes using googletest --- test/CMakeLists.txt | 3 +- test/c/CMakeLists.txt | 30 +++++++++++++++ test/c/main.cpp | 14 +++++++ test/c/test_tdma_solver.cpp | 77 +++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 test/c/CMakeLists.txt create mode 100644 test/c/main.cpp create mode 100644 test/c/test_tdma_solver.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8fa8e2..2a5f5df 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,4 +9,5 @@ # SPDX-License-Identifier: BSD-3-Clause # --------------------------------------------------------------- -add_subdirectory(fortran) +# add_subdirectory(fortran) +add_subdirectory(c) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt new file mode 100644 index 0000000..be1af9e --- /dev/null +++ b/test/c/CMakeLists.txt @@ -0,0 +1,30 @@ +# Fetch GoogleTest via FetchContent +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip +) +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +# Find Kokkos (or use your existing Kokkos installation) +# find_package(Kokkos REQUIRED) + +set(SOURCES + main.cpp + test_tdma_solver.cpp +) +# Create the test executable from your test files, including main.cpp. +add_executable(iconmath_test_c ${SOURCES}) + +# Link the test executable with GoogleTest and Kokkos. +target_link_libraries(iconmath_test_c + PUBLIC + iconmath-support + PRIVATE + gtest_main + Kokkos::kokkos +) + +include(GoogleTest) +gtest_discover_tests(iconmath_test_c) diff --git a/test/c/main.cpp b/test/c/main.cpp new file mode 100644 index 0000000..2df720d --- /dev/null +++ b/test/c/main.cpp @@ -0,0 +1,14 @@ +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> + +int main(int argc, char** argv) { + // Initialize Kokkos before any tests run. + Kokkos::initialize(argc, argv); + + ::testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + + // Finalize Kokkos after all tests have completed. + Kokkos::finalize(); + return result; +} diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp new file mode 100644 index 0000000..7c3c3a8 --- /dev/null +++ b/test/c/test_tdma_solver.cpp @@ -0,0 +1,77 @@ +#include <gtest/gtest.h> +#include <vector> +#include <algorithm> +#include "mo_math_utilities.hpp" + +// Helper function to compute the 1D index for column-major storage. +inline int idx(int i, int j, int nrows) { + return i + j * nrows; +} + +// Test fixture for the TDMA solver tests. +class TDMASolverTestFixture : public ::testing::Test { +protected: + const int n = 10; // Matrix dimension. + std::vector<double> a; // Input matrix a. + std::vector<double> b; // Input matrix b. + std::vector<double> c; // Input matrix c. + std::vector<double> d; // Input matrix d. + std::vector<double> x; // Output matrix. + + TDMASolverTestFixture() + : a(n * n), b(n * n), c(n * n), d(n * n), x(n * n, 0.0) {} + + // SetUp is run before each test. + void SetUp() override { + // Fill arrays in column-major order. + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + double value = (i + 1) + (j + 1); + a[idx(i, j, n)] = 1.0 * value; + b[idx(i, j, n)] = 2.0 * value; + c[idx(i, j, n)] = 1.0 * value; + d[idx(i, j, n)] = 1.0 * value; + } + } + // Clear the output vector. + std::fill(x.begin(), x.end(), 0.0); + } +}; + +TEST_F(TDMASolverTestFixture, FullTest) { + // Call the solver over the full range: + tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + 0, n, 0, n, n, n, x.data()); + + // Compute the sum of all elements in the output matrix. + double sum = 0.0; + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + sum += x[idx(i, j, n)]; + } + } + + // Expected reference sum + double sum_ref = 27.2727272727272769; + double tol = 1e-13; + EXPECT_NEAR(sum, sum_ref, tol); +} + +TEST_F(TDMASolverTestFixture, PartialTest) { + // Call the solver for a partial region: + // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1. + tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + 1, n - 1, 1, n - 1, n, n, x.data()); + + // Compute the sum over a region + double sum = 0.0; + for (int j = 1; j < n - 1; j++) { + for (int i = 1; i < n - 1; i++) { + sum += x[idx(i, j, n)]; + } + } + + double sum_ref = 17.7777777777777679; + double tol = 1e-13; + EXPECT_NEAR(sum, sum_ref, tol); +} -- GitLab From 1b8c2abb9cca5fa5a7163f3d8037fada7ccd0108 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 23 Feb 2025 21:53:41 +0100 Subject: [PATCH 28/50] added c++ code for mo_lib_interpolation_vector --- src/interpolation/CMakeLists.txt | 11 ++ .../mo_lib_interpolation_vector.cpp | 130 ++++++++++++++++++ .../mo_lib_interpolation_vector.hpp | 38 +++++ src/support/mo_lib_loopindices.hpp | 11 ++ 4 files changed, 190 insertions(+) create mode 100644 src/interpolation/mo_lib_interpolation_vector.cpp create mode 100644 src/interpolation/mo_lib_interpolation_vector.hpp create mode 100644 src/support/mo_lib_loopindices.hpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 73e582c..9455f9e 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 + mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) @@ -55,10 +56,20 @@ target_include_directories( $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> # Path to internal include directory $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${PROJECT_SOURCE_DIR}/include>> + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>> ) target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-interpolation PUBLIC iconmath-support) +target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets") diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp new file mode 100644 index 0000000..50772e7 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -0,0 +1,130 @@ +#include "mo_lib_loopindices.hpp" +#include "mo_lib_interpolation_vector.hpp" + +// The templated C++ function using Kokkos. +// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. +// Note: The dimensions below must match the Fortran arrays. +// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] +// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] +// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] +// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); + + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); + + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); + + // Loop over cell blocks as in the original Fortran code. + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Call get_indices_c_lib to get inner loop indices for block jb. + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for("edges2cells_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + }); + // Optionally fence after each block if required. + Kokkos::fence(); + } +} + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<double>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<float>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp new file mode 100644 index 0000000..a764ada --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -0,0 +1,38 @@ +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp new file mode 100644 index 0000000..d53aa38 --- /dev/null +++ b/src/support/mo_lib_loopindices.hpp @@ -0,0 +1,11 @@ +extern "C" { + // get_indices_c_lib function + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); +} -- GitLab From e2411d5dd5375c955f179a39e54596bb8685e10a Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:20:00 +0100 Subject: [PATCH 29/50] added license to the new files --- src/interpolation/mo_lib_interpolation_vector.cpp | 11 +++++++++++ src/interpolation/mo_lib_interpolation_vector.hpp | 11 +++++++++++ src/support/mo_lib_loopindices.hpp | 11 +++++++++++ src/support/mo_math_utilities.hpp | 11 +++++++++++ test/c/CMakeLists.txt | 13 +++++++++++++ test/c/main.cpp | 11 +++++++++++ test/c/test_tdma_solver.cpp | 11 +++++++++++ 7 files changed, 79 insertions(+) diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 50772e7..00a914a 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_vector.hpp" diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp index a764ada..0d19b24 100644 --- a/src/interpolation/mo_lib_interpolation_vector.hpp +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <Kokkos_Core.hpp> #include <vector> diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp index d53aa38..03eb977 100644 --- a/src/support/mo_lib_loopindices.hpp +++ b/src/support/mo_lib_loopindices.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + extern "C" { // get_indices_c_lib function void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp index 20b1f44..4ee5dc9 100644 --- a/src/support/mo_math_utilities.hpp +++ b/src/support/mo_math_utilities.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <vector> #include <iostream> #include <chrono> // For timing diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index be1af9e..52225a7 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -1,3 +1,14 @@ +# ICON +# +# --------------------------------------------------------------- +# Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +# Contact information: icon-model.org +# +# See AUTHORS.TXT for a list of authors +# See LICENSES/ for license information +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------- + # Fetch GoogleTest via FetchContent include(FetchContent) FetchContent_Declare( @@ -13,6 +24,7 @@ FetchContent_MakeAvailable(googletest) set(SOURCES main.cpp test_tdma_solver.cpp + test_interpolation_vector.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) @@ -21,6 +33,7 @@ add_executable(iconmath_test_c ${SOURCES}) target_link_libraries(iconmath_test_c PUBLIC iconmath-support + iconmath-interpolation PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/main.cpp b/test/c/main.cpp index 2df720d..bd0fadc 100644 --- a/test/c/main.cpp +++ b/test/c/main.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <Kokkos_Core.hpp> #include <gtest/gtest.h> diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 7c3c3a8..8f120ef 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <gtest/gtest.h> #include <vector> #include <algorithm> -- GitLab From dab58c8117fdd4db89a7cdbad96b7690556540ff Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:23:09 +0100 Subject: [PATCH 30/50] fixed a bug in cmake style --- test/c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 52225a7..95ca08b 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -15,7 +15,7 @@ FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip ) -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) # Find Kokkos (or use your existing Kokkos installation) -- GitLab From 29625285aa26ac3b1fdae7861e6765e1f7843bb1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:25:39 +0100 Subject: [PATCH 31/50] added test for interpolation_vector --- test/c/test_interpolation_vector.cpp | 123 +++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 test/c/test_interpolation_vector.cpp diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp new file mode 100644 index 0000000..dc70a63 --- /dev/null +++ b/test/c/test_interpolation_vector.cpp @@ -0,0 +1,123 @@ +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_interpolation_vector.hpp" + +// Dimensions for the test (small, trivial test). +// We assume Fortran ordering: column-major, but our C wrappers will wrap raw pointers into Kokkos::Views with LayoutLeft. +constexpr int nproma = 2; +constexpr int nlev = 3; +constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) +constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients + +// For the get_indices_c_lib inputs. +constexpr int i_startblk = 0; +constexpr int i_endblk = 1; // two blocks: indices 0 and 1 +constexpr int i_startidx_in = 0; +constexpr int i_endidx_in = nproma - 1; // 0 and 1 +constexpr int slev = 0; +constexpr int elev = nlev - 1; // 0 .. 2 + +// Helper to compute total number of elements for a 3D array stored in column-major order. +template<typename T> +size_t num_elements(int dim1, int dim2, int dim3) { + return static_cast<size_t>(dim1) * dim2 * dim3; +} + +// Test for the double precision (dp) version. +TEST(Edges2CellsTest, DPTest) { + // Allocate and fill input arrays. + std::vector<double> p_vn_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); + std::vector<double> p_vt_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); + // cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] + std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); + std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); + + // Here we set cell_edge_idx to 1, 2, 1 for every triple. + for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { + cell_edge_idx[i] = 1; + cell_edge_idx[i+1] = 2; + cell_edge_idx[i+2] = 1; + } + // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 means block 0 after subtracting 1). + // e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] + std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0); + std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0); + // Output arrays: dimensions [nproma, nlev, nblks_c] + std::vector<double> p_u_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); + std::vector<double> p_v_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); + + std::vector<double> p_u_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); + std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); + + // Call the dp (double precision) version. + edges2cells_vector_lib_dp( + p_vn_in.data(), p_vt_in.data(), + cell_edge_idx.data(), cell_edge_blk.data(), + e_bln_c_u.data(), e_bln_c_v.data(), + p_u_out.data(), p_v_out.data(), + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); + + // Check that for each computed cell in p_u_out and p_v_out, the value is 6. + // This is because for each cell, the kernel adds 6 terms of 1*1. + p_u_ref[0] = 0.0; + p_u_ref[8] = 0.0; + p_u_ref[10] = 0.0; + p_v_ref[0] = 0.0; + p_v_ref[8] = 0.0; + p_v_ref[10] = 0.0; + for (size_t idx = 0; idx < p_u_out.size(); ++idx) { + EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12); + EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12); + } +} + +// Test for the single precision (sp) version. +TEST(Edges2CellsTest, SPTest) { + // Allocate and fill input arrays. + std::vector<float> p_vn_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); + std::vector<float> p_vt_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); + std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); + std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); + // Set cell_edge_idx values to 1, 2, 1. + for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { + cell_edge_idx[i] = 1; + cell_edge_idx[i+1] = 2; + cell_edge_idx[i+2] = 1; + } + std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f); + std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f); + std::vector<float> p_u_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); + std::vector<float> p_v_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); + + std::vector<float> p_u_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); + std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); + + // Call the sp (float precision) version. + edges2cells_vector_lib_sp( + p_vn_in.data(), p_vt_in.data(), + cell_edge_idx.data(), cell_edge_blk.data(), + e_bln_c_u.data(), e_bln_c_v.data(), + p_u_out.data(), p_v_out.data(), + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); + + p_u_ref[0] = 0.0f; + p_u_ref[8] = 0.0f; + p_u_ref[10] = 0.0f; + p_v_ref[0] = 0.0f; + p_v_ref[8] = 0.0f; + p_v_ref[10] = 0.0f; + // Verify that every computed output equals 6. + for (size_t idx = 0; idx < p_u_out.size(); ++idx) { + EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f); + EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-5f); + } +} -- GitLab From 17c9c507858ff8f99036b4be34b0ebd8a139a7ee Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 07:27:21 +0100 Subject: [PATCH 32/50] added licence to the new test file --- test/c/test_interpolation_vector.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index dc70a63..a33673c 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> -- GitLab From b9b268efd076d905596d78b0760cd364d4306419 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 09:25:16 +0100 Subject: [PATCH 33/50] modified CMakeLists.txt of horizontal to enable compilation of C++ code that uses Kokkos --- src/horizontal/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 7515842..6cebed9 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -52,11 +52,21 @@ target_include_directories( # Path to the Fortran modules: $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>> $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>> ) target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation) +target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets") -- GitLab From 01cbeda7dca297ee6fc738c7207b55e995d2e879 Mon Sep 17 00:00:00 2001 From: Georgiana Mania <mania@dkrz.de> Date: Mon, 24 Feb 2025 12:32:00 +0000 Subject: [PATCH 34/50] configure compilation for kokkos + nvidia (icon-libraries/libiconmath!31) Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: default --- CMakeLists.txt | 11 +++++++++++ src/horizontal/CMakeLists.txt | 3 ++- src/interpolation/CMakeLists.txt | 1 + src/support/CMakeLists.txt | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7cf92be..affedaa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF) option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF) option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF) option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF) +set(IM_ENABLE_GPU OFF CACHE STRING "Enable Kokkos GPU support for arch. Valid values: OFF, nvidia-sm80") # GNUInstallDirs issues a warning if CMAKE_SIZEOF_VOID_P is not defined, which # is the case with NAG. One way to circumvent that is to enable C language for @@ -124,6 +125,16 @@ set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support") set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend") set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations") +if ("${IM_ENABLE_GPU}" STREQUAL "nvidia-sm80") + # NVIDIA A100 + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Kokkos CUDA backend") + set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "CUDA architecture: Ampere cc80") +endif() + +if (${IM_ENABLE_OPENMP}) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Kokkos OpenMP backend") +endif() + FetchContent_MakeAvailable(kokkos) add_subdirectory(src) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 6cebed9..d2abe7d 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -66,7 +66,8 @@ target_include_directories( target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-support) target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation) -target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) +target_link_libraries(iconmath-horizontal PRIVATE Kokkos::kokkos) +set_target_properties(iconmath-horizontal PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets") diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 9455f9e..346aaaa 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -70,6 +70,7 @@ target_include_directories( target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-interpolation PUBLIC iconmath-support) target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) +set_target_properties(iconmath-interpolation PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets") diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 9f56017..ed6a4d3 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -80,6 +80,7 @@ target_link_libraries(iconmath-support PRIVATE Kokkos::kokkos ) +set_target_properties(iconmath-support PROPERTIES LINKER_LANGUAGE Fortran) install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets") -- GitLab From c61fe00d6c0e99ce97afc9877625c3e6034fad01 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Feb 2025 21:57:30 +0100 Subject: [PATCH 35/50] added openacc_fortran_options only for the compilation of Fortran codes fixed a cmake style added to two other components --- src/horizontal/CMakeLists.txt | 4 +++- src/interpolation/CMakeLists.txt | 3 ++- src/support/CMakeLists.txt | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index d2abe7d..078a14d 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -39,7 +39,9 @@ if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) - target_compile_options(iconmath-horizontal PRIVATE ${OpenACC_Fortran_OPTIONS}) + target_compile_options(iconmath-horizontal + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-horizontal PRIVATE OpenACC::OpenACC_Fortran) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 346aaaa..f982f3b 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -41,7 +41,8 @@ if(IM_ENABLE_OPENACC) # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) target_compile_options(iconmath-interpolation - PRIVATE ${OpenACC_Fortran_OPTIONS}) + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-interpolation PRIVATE OpenACC::OpenACC_Fortran) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index ed6a4d3..e78fc16 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -50,7 +50,9 @@ if(IM_ENABLE_OPENACC) # If _OPENACC is defined, assume that the required compiler flags are already # provided, e.g. in CMAKE_Fortran_FLAGS: if(NOT HAS_OPENACC_MACRO) - target_compile_options(iconmath-support PRIVATE ${OpenACC_Fortran_OPTIONS}) + target_compile_options(iconmath-support + PRIVATE + $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>) # This make sures that unit tests (FortUTF) compiles without the need of # passing OpenACC compile option. target_link_libraries(iconmath-support PRIVATE OpenACC::OpenACC_Fortran) -- GitLab From e4efb896b8f7e7f092bcd3664e400a7e57efb9cc Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Mon, 24 Feb 2025 21:30:54 +0000 Subject: [PATCH 36/50] Added a new argument to the functions of `mo_lib_loopindices.cpp` to fix a bug regarding startindex (icon-libraries/libiconmath!32) This made the code to produce bit-identical results for both Fortran and C++ Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: bigfix --- src/support/CMakeLists.txt | 3 +- src/support/mo_lib_loopindices.cpp | 100 +++++++++++++++++---------- src/support/mo_lib_loopindices.hpp | 20 +++--- src/support/mo_math_utilities.F90 | 8 +-- src/support/mo_math_utilities.cpp | 16 +++-- src/support/mo_math_utilities.hpp | 8 +-- src/support/support_bindings.cpp | 50 ++++++++++++++ src/support/support_bindings.h | 33 +++++++++ test/c/test_interpolation_vector.cpp | 12 ---- test/c/test_tdma_solver.cpp | 4 +- 10 files changed, 176 insertions(+), 78 deletions(-) create mode 100644 src/support/support_bindings.cpp create mode 100644 src/support/support_bindings.h diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index e78fc16..44f60aa 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -19,7 +19,8 @@ add_library( mo_math_types.f90 mo_math_utilities.cpp mo_math_utilities.F90 - mo_random_number_generators.F90) + mo_random_number_generators.F90 + support_bindings.cpp) add_library(${PROJECT_NAME}::support ALIAS iconmath-support) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index e6d9d21..30c82bd 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -11,47 +11,75 @@ #include <algorithm> // For std::max -extern "C" { - // get_indices_c_lib function - void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - if (i_blk == i_startblk) { - i_startidx_out = std::max(1, i_startidx_in); - i_endidx_out = nproma; - if (i_blk == i_endblk) { - i_endidx_out = i_endidx_in; - } - } else if (i_blk == i_endblk) { - i_startidx_out = 1; +// get_indices_c_lib function +void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from + //(either fortran or c++), the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; + + if (i_blk == i_startblk) { + i_startidx_out = std::max(first_index, i_startidx_in); + i_endidx_out = nproma; + if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; - } else { - i_startidx_out = 1; - i_endidx_out = nproma; } + } else if (i_blk == i_endblk) { + i_startidx_out = first_index; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = first_index; + i_endidx_out = nproma; } +} - // get_indices_e_lib function - void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in); - i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; - } +// get_indices_e_lib function +void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + //the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; + + i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in); + i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; +} + +// get_indices_v_lib function +void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + //the first index should be either 0 or 1. + int first_index; + if (called_from_cpp) + first_index = 0; + else + first_index = 1; - // get_indices_v_lib function - void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out) { - if (i_blk == i_startblk) { - i_startidx_out = i_startidx_in; - i_endidx_out = nproma; - if (i_blk == i_endblk) { - i_endidx_out = i_endidx_in; - } - } else if (i_blk == i_endblk) { - i_startidx_out = 1; + if (i_blk == i_startblk) { + i_startidx_out = i_startidx_in; + i_endidx_out = nproma; + if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; - } else { - i_startidx_out = 1; - i_endidx_out = nproma; } + } else if (i_blk == i_endblk) { + i_startidx_out = first_index; + i_endidx_out = i_endidx_in; + } else { + i_startidx_out = first_index; + i_endidx_out = nproma; } -} +} \ No newline at end of file diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp index 03eb977..5136c6a 100644 --- a/src/support/mo_lib_loopindices.hpp +++ b/src/support/mo_lib_loopindices.hpp @@ -8,15 +8,17 @@ // See LICENSES/ for license information // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#pragma once -extern "C" { - // get_indices_c_lib function - void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); +void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); - void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); +void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); - void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, - int &i_startidx_out, int &i_endidx_out); -} +void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, + const int i_blk, const int i_startblk, const int i_endblk, + int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true); + \ No newline at end of file diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90 index 168ec26..d1cf120 100644 --- a/src/support/mo_math_utilities.F90 +++ b/src/support/mo_math_utilities.F90 @@ -82,7 +82,7 @@ MODULE mo_math_utilities #ifndef __USE_CPP_BINDINGS PUBLIC :: tdma_solver_vec #else - PUBLIC :: tdma_solver_vec_double + PUBLIC :: tdma_solver_vec_dp #endif PUBLIC :: check_orientation @@ -246,14 +246,14 @@ CONTAINS ! C++ binding for tdma_solver_vec INTERFACE - SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & - BIND(C, NAME="tdma_solver_vec_double") + SUBROUTINE tdma_solver_vec_dp(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & + BIND(C, NAME="tdma_solver_vec_dp") IMPORT :: c_double, c_int REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*) INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols REAL(c_double), INTENT(OUT) :: varout(*) INTEGER(c_int), OPTIONAL :: opt_acc_queue - END SUBROUTINE tdma_solver_vec_double + END SUBROUTINE tdma_solver_vec_dp END INTERFACE CONTAINS diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index 6a60f2c..5859b9d 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -75,11 +75,13 @@ void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl; } -extern "C" { +template +void tdma_solver_vec<double>(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); + +template +void tdma_solver_vec<float>(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout); - void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout) { - tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); - } -} diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp index 4ee5dc9..a3f3ba1 100644 --- a/src/support/mo_math_utilities.hpp +++ b/src/support/mo_math_utilities.hpp @@ -17,10 +17,4 @@ template <typename T> void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d, int slev, int elev, int startidx, int endidx, - int nrows, int ncols, T* varout); - -extern "C" { - void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d, - int slev, int elev, int startidx, int endidx, - int nrows, int ncols, double* varout); -} + int nrows, int ncols, T* varout); \ No newline at end of file diff --git a/src/support/support_bindings.cpp b/src/support/support_bindings.cpp new file mode 100644 index 0000000..664fc1e --- /dev/null +++ b/src/support/support_bindings.cpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "support_bindings.h" +#include "mo_lib_loopindices.hpp" +#include "mo_math_utilities.hpp" + + +// mo_loop_indices.F90 +// C wrappers for C++ functionality +void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, + i_endblk, i_startidx_out, i_endidx_out, false); +} +void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_e_lib(i_startidx_in, i_endidx_in, nproma,i_blk, i_startblk, i_endblk, + i_startidx_out, i_endidx_out, false); +} + +void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out){ + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, + i_startidx_out,i_endidx_out, false); +} + +void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout){ + + tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + +} + +void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout){ + + tdma_solver_vec<float>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout); + +} diff --git a/src/support/support_bindings.h b/src/support/support_bindings.h new file mode 100644 index 0000000..df452e4 --- /dev/null +++ b/src/support/support_bindings.h @@ -0,0 +1,33 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +extern "C" { + // mo_loop_indices.F90 + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + //mo_math_utilities.F90 + void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, double* varout); + + void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d, + int slev, int elev, int startidx, int endidx, + int nrows, int ncols, float* varout); + +} diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index a33673c..0eb5a8d 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -75,12 +75,6 @@ TEST(Edges2CellsTest, DPTest) { // Check that for each computed cell in p_u_out and p_v_out, the value is 6. // This is because for each cell, the kernel adds 6 terms of 1*1. - p_u_ref[0] = 0.0; - p_u_ref[8] = 0.0; - p_u_ref[10] = 0.0; - p_v_ref[0] = 0.0; - p_v_ref[8] = 0.0; - p_v_ref[10] = 0.0; for (size_t idx = 0; idx < p_u_out.size(); ++idx) { EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12); EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12); @@ -120,12 +114,6 @@ TEST(Edges2CellsTest, SPTest) { nproma, nlev, nblks_e, nblks_c); - p_u_ref[0] = 0.0f; - p_u_ref[8] = 0.0f; - p_u_ref[10] = 0.0f; - p_v_ref[0] = 0.0f; - p_v_ref[8] = 0.0f; - p_v_ref[10] = 0.0f; // Verify that every computed output equals 6. for (size_t idx = 0; idx < p_u_out.size(); ++idx) { EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f); diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 8f120ef..4e09ff3 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -51,7 +51,7 @@ protected: TEST_F(TDMASolverTestFixture, FullTest) { // Call the solver over the full range: - tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), 0, n, 0, n, n, n, x.data()); // Compute the sum of all elements in the output matrix. @@ -71,7 +71,7 @@ TEST_F(TDMASolverTestFixture, FullTest) { TEST_F(TDMASolverTestFixture, PartialTest) { // Call the solver for a partial region: // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1. - tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(), + tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), 1, n - 1, 1, n - 1, n, n, x.data()); // Compute the sum over a region -- GitLab From 7620931e152dbb3a161df7212484654a52845b0e Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 21:46:48 +0000 Subject: [PATCH 37/50] Draft: cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib (icon-libraries/libiconmath!34) ## What is the new feature cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib ## How is it implemented Kept separate file from other `mo_lib_intp_rbf` routines to avoid merge conflicts. Will be resolved by Ali and Dylan later. Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- src/interpolation/CMakeLists.txt | 5 +- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ++++++++++++++++++ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 +++ ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++ ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 +++++ test/c/CMakeLists.txt | 1 + test/c/test_intp_rbf.cpp | 126 +++++++++++ 7 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h create mode 100644 test/c/test_intp_rbf.cpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index f982f3b..37c3ad0 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -14,7 +14,10 @@ add_library( mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp - mo_lib_intp_rbf.F90) + mo_lib_intp_rbf.F90 + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp new file mode 100644 index 0000000..c9b776e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -0,0 +1,197 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() +/// +/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf + +#include <type_traits> +#include <Kokkos_Core.hpp> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v + ) +{ + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + + + + // input components of velocity or horizontal vorticity vectors at edge midpoints + // dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex + // (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), + // dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + //int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb=i_startblk; jb <= i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + } + ); + } +} + +// Explicit instantiation - double precision +template +void rbf_vec_interpol_vertex_lib<double, double>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - single precision +template +void rbf_vec_interpol_vertex_lib<float, float>( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - mixed precision +template +void rbf_vec_interpol_vertex_lib<double, float>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp new file mode 100644 index 0000000..c0b6f05 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_c + ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp new file mode 100644 index 0000000..06dc467 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp @@ -0,0 +1,134 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h new file mode 100644 index 0000000..4356f88 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h @@ -0,0 +1,54 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +extern "C" { + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +} \ No newline at end of file diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 95ca08b..13c5dfe 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -25,6 +25,7 @@ set(SOURCES main.cpp test_tdma_solver.cpp test_interpolation_vector.cpp + test_intp_rbf.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp new file mode 100644 index 0000000..0aa4f9b --- /dev/null +++ b/test/c/test_intp_rbf.cpp @@ -0,0 +1,126 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template<typename T> +size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template<typename T> +size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template<typename InT, typename OutT> +struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + +// Define a typed test fixture. +template <typename TypePair> +class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + + // Parameter values. + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + + // Arrays stored in std::vector. + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolVertexMixedTestFixture() { + // Allocate and initialize inputs. + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); + rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); + rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); + +TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function with mixed precision. + rbf_vec_interpol_vertex_lib<InType, OutType>( + this->p_e_in.data(), + this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), + this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), + this->p_v_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->lacc, + this->acc_async, + this->nlev, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + + // Check the outputs only for blocks in the range [i_startblk, i_endblk]. + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = 0; level < this->nlev; ++level) { + for (int i = 0; i < this->nproma; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab From 84f5c0904765c3f573e847519064fde4386f7274 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 16:03:59 +0100 Subject: [PATCH 38/50] ported mo_lib_laplace --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/mo_lib_laplace.cpp | 104 +++++++++++++++++++++ src/horizontal/mo_lib_laplace.hpp | 24 +++++ src/horizontal/mo_lib_laplace_bindings.cpp | 50 ++++++++++ src/horizontal/mo_lib_laplace_bindings.h | 32 +++++++ 5 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 src/horizontal/mo_lib_laplace.cpp create mode 100644 src/horizontal/mo_lib_laplace.hpp create mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp create mode 100644 src/horizontal/mo_lib_laplace_bindings.h diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 078a14d..44f9e44 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -13,7 +13,9 @@ add_library( iconmath-horizontal mo_lib_divrot.F90 mo_lib_laplace.F90 - mo_lib_gradients.F90) + mo_lib_gradients.F90 + mo_lib_laplace_bindings.cpp + mo_lib_laplace.cpp) add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp new file mode 100644 index 0000000..48c4479 --- /dev/null +++ b/src/horizontal/mo_lib_laplace.cpp @@ -0,0 +1,104 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_laplace.hpp" +//#include "mo_lib_gradients.hpp" +#include <Kokkos_Core.hpp> +#include <iostream> + +template<typename T> +void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + + UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); + UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); + UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2); + UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2); + UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e); + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3); + UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); + UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); + UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); + + + switch (cell_type){ + + case 3: + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + + Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + + nabla2_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * + geofac_n2s_view(jc, 4, jb); + }); + } + break; + + + case 6: +/* TODO + grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, + inv_dual_edge_length, z_grad_fd_norm_e,& + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + slev, elev, nproma); + + div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, + geofac_div, nabla2_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma); +*/ + default: + std::cout << "Unknown value for cell_type\n"; + + }//switch +}//void + +template +void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + +template +void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp new file mode 100644 index 0000000..2a6663c --- /dev/null +++ b/src/horizontal/mo_lib_laplace.hpp @@ -0,0 +1,24 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <vector> + +template<typename T> +void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp new file mode 100644 index 0000000..43a19b3 --- /dev/null +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + + +#include "mo_lib_laplace_bindings.h" +#include "mo_lib_laplace.hpp" + + +void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + edge_cell_idx, edge_cell_blk, inv_dual_edge_length, + cell_edge_idx, cell_edge_blk, + geofac_n2s, geofac_div, nabla2_psi_c, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); +} + + +void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + edge_cell_idx, edge_cell_blk, inv_dual_edge_length, + cell_edge_idx, cell_edge_blk, + geofac_n2s, geofac_div, nabla2_psi_c, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); +} diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h new file mode 100644 index 0000000..96bdc64 --- /dev/null +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + + + +extern "C"{ + +void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + +void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); +} -- GitLab From 3a150bdafae39401173346a89020a56767920124 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 16:28:59 +0100 Subject: [PATCH 39/50] Fixed index for nabla2_scalar_lib --- src/horizontal/mo_lib_laplace.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 48c4479..203b3b7 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* KOKKOS_LAMBDA(const int jk, const int jc){ nabla2_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + + psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * geofac_n2s_view(jc, 2, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * - geofac_n2s_view(jc, 4, jb); + geofac_n2s_view(jc, 3, jb); }); } break; -- GitLab From 79ce81cea6b785c2b8c4185cd8a3414fb5c81fbd Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Wed, 26 Feb 2025 10:04:06 +0100 Subject: [PATCH 40/50] WIP: MR seperation. --- src/horizontal/mo_lib_laplace.cpp | 118 ++++++++++++++++++++++++++++++ src/horizontal/mo_lib_laplace.hpp | 7 ++ 2 files changed, 125 insertions(+) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 203b3b7..57ad292 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -11,8 +11,10 @@ #include "mo_lib_laplace.hpp" //#include "mo_lib_gradients.hpp" +//#inlcude "mo_fortran_tools.hpp" #include <Kokkos_Core.hpp> #include <iostream> +#include <utility> template<typename T> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, @@ -102,3 +104,119 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + + +//--------------------------------nabla2_scalar_avg_lib------------------------------------- + + +template<typename T> +void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ + + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); + UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); + UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); + UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); + + int aux_c + UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local + + + + switch (cell_type){ + + case 3: + + if(slev == elev){ + + jk = slev; + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("aux_c", + Kokkos::RangePolicy<int>(i_startidx, i_endidx), + KOKKOS_LAMBDA(const int jc) { + + aux_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb); + + }); + } + + if (l_limited_area || patch_id > 1){ + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + /*TODO + gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), + Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); + */ + } + + i_startblk = i_startblk_in[3]; + i_endblk = i_endblk_in[3]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("DivGrad", + Kokkos::RangePolicy<int>(i_startidx, i_endidx), + KOKKOS_LAMBDA(const int jc) { + + nabla2_psi_c_view(jc, jk, jb) = + aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + + }); + } + }//if + break; + + default: + std::cout << "Default case.\n"; + +}//switch +}//void + + +template +void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + +template +void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 2a6663c..0a67f85 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -22,3 +22,10 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); +template<typename T> +void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + -- GitLab From 595f8e4f9c136a3dadcf6f319ed1db33b9680822 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante2.lvt.dkrz.de> Date: Wed, 26 Feb 2025 15:02:02 +0100 Subject: [PATCH 41/50] Completed mo_lib_laplace --- src/horizontal/mo_lib_laplace.cpp | 145 ++++++++++++++------- src/horizontal/mo_lib_laplace.hpp | 2 +- src/horizontal/mo_lib_laplace_bindings.cpp | 4 +- src/horizontal/mo_lib_laplace_bindings.h | 4 +- 4 files changed, 102 insertions(+), 53 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 57ad292..3ed2102 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -15,6 +15,7 @@ #include <Kokkos_Core.hpp> #include <iostream> #include <utility> +#include <memory> template<typename T> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, @@ -112,7 +113,7 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, template<typename T> void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ @@ -127,10 +128,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); - int aux_c - UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local - - + Kokkos::View<int***, Kokkos::LayoutLeft> aux_c_view("aux_c_view", nproma, nlev, nblks_c); switch (cell_type){ @@ -138,71 +136,122 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i if(slev == elev){ - jk = slev; - i_startblk = i_startblk_in[1]; - i_endblk = i_endblk_in[1]; + int jk = slev; + int i_startblk = i_startblk_in[1]; + int i_endblk = i_endblk_in[1]; for(int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("aux_c", Kokkos::RangePolicy<int>(i_startidx, i_endidx), KOKKOS_LAMBDA(const int jc) { aux_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * - geofac_n2s_view(jc, 1, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * - geofac_n2s_view(jc, 2, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb); - + psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb); }); } - if (l_limited_area || patch_id > 1){ + if (l_limited_area || patch_id > 1){ - i_startblk = i_startblk_in[2]; - i_endblk = i_endblk_in[2]; + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; - /*TODO - gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), - Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); - */ + /*TODO + copy(Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), + Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); + */ + } + + i_startblk = i_startblk_in[3]; + i_endblk = i_endblk_in[3]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("DivGrad", + Kokkos::RangePolicy<int>(i_startidx, i_endidx), + KOKKOS_LAMBDA(const int jc) { + + nabla2_psi_c_view(jc, jk, jb) = + aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + + }); } + }else{ - i_startblk = i_startblk_in[3]; - i_endblk = i_endblk_in[3]; + int i_startblk = i_startblk_in[1]; + int i_endblk = i_endblk_in[1]; - for(int jb = i_startblk; jb < i_endblk; ++jb) { + for(int jb = i_startblk; jb < i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + aux_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb); + }); + } - Kokkos::parallel_for("DivGrad", - Kokkos::RangePolicy<int>(i_startidx, i_endidx), - KOKKOS_LAMBDA(const int jc) { - nabla2_psi_c_view(jc, jk, jb) = - aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * - avg_coeff_view(jc, 1, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * - avg_coeff_view(jc, 2, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - avg_coeff_view(jc, 3, jb); + if (l_limited_area || patch_id > 1){ - }); - } - }//if - break; + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; - default: - std::cout << "Default case.\n"; + /* + copy(Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), + Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); + */ + } + i_startblk = i_startblk_in[3]; + i_endblk = i_endblk_in[3]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + nabla2_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb); + }); + } + }//if }//switch }//void @@ -210,13 +259,13 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i template void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); template void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 0a67f85..3aa905f 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -25,7 +25,7 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* template<typename T> void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp index 43a19b3..73e5aa7 100644 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -36,7 +36,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ @@ -44,7 +44,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons edge_cell_idx, edge_cell_blk, inv_dual_edge_length, cell_edge_idx, cell_edge_blk, geofac_n2s, geofac_div, nabla2_psi_c, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk[3], i_endblk[3], i_startidx_in[3], i_endidx_in[3], i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); } diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h index 96bdc64..c0fe237 100644 --- a/src/horizontal/mo_lib_laplace_bindings.h +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -18,7 +18,7 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); @@ -26,7 +26,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); } -- GitLab From 1fba81333e3038e280c4067dd1fe5ec81d9cb94e Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante6.lvt.dkrz.de> Date: Thu, 27 Feb 2025 08:27:38 +0100 Subject: [PATCH 42/50] Ready for MR --- src/horizontal/mo_lib_laplace.cpp | 102 ++++++++++++++++++++- src/horizontal/mo_lib_laplace.hpp | 11 +++ src/horizontal/mo_lib_laplace_bindings.cpp | 74 ++++++++++++++- src/horizontal/mo_lib_laplace_bindings.h | 36 +++++++- 4 files changed, 217 insertions(+), 6 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 3ed2102..361d752 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -12,6 +12,7 @@ #include "mo_lib_laplace.hpp" //#include "mo_lib_gradients.hpp" //#inlcude "mo_fortran_tools.hpp" +//#inlcude "mo_lib_divrot" #include <Kokkos_Core.hpp> #include <iostream> #include <utility> @@ -204,7 +205,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::parallel_for("aux_c_view", Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), KOKKOS_LAMBDA(const int jk, const int jc){ aux_c_view(jc, jk, jb) = @@ -238,7 +239,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::parallel_for("nabla2_psi_c_view", Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), KOKKOS_LAMBDA(const int jk, const int jc){ nabla2_psi_c_view(jc, jk, jb) = @@ -269,3 +270,100 @@ void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_i int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + + +//-----------------------------------------nabla2_vec_atmos_lib--------------------------------------- + +template<typename T> +void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation, + const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div, + const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ + + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma,nlev,nblks_e); + + UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2); + UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2); + UnmanagedConstInt3D edge_vertex_idx_view(edge_vertex_idx, nproma,nblks_e,4); + UnmanagedConstInt3D edge_vertex_blk_view(edge_vertex_blk, nproma,nblks_e,4); + + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3); + + UnmanagedConstInt3D vert_edge_idx_view(vert_edge_idx, nproma,nblks_v,6); + UnmanagedConstInt3D vert_edge_blk_view(vert_edge_blk, nproma,nblks_v,6); + + UnmanagedConstT2D tangent_orientation_view(tangent_orientation, nproma,nblks_e); + UnmanagedConstT2D inv_primal_edge_length_view(inv_primal_edge_length, nproma,nblks_e); + UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma,9-cell_type,nblks_v); + UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); + + UnmanagedT3D nabla2_vec_e_view(nabla2_vec_e, nproma,nlev,nblks_e); + + Kokkos::View<int***, Kokkos::LayoutLeft> z_div_c_view("z_div_c", nproma, nlev, nblks_c); //Local + Kokkos::View<int***, Kokkos::LayoutLeft> z_rot_v_view("z_rot_v", nproma, nlev, nblks_v); //Local + + + /*TODO + div_lib(vec_e, cell_edge_idx, cell_edge_blk, geofac_div, z_div_c, i_startblk_c, + i_endblk_c, i_startidx_c, i_endidx_c, slev, elev, nproma); + + rot_vertex_atmos_lib(vec_e, vert_edge_idx, vert_edge_blk, geofac_rot, z_rot_v, + i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v, + slev, elev, nproma); + */ + + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("nabla2_psi_c_view", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int je){ + + nabla2_vec_e_view(je, jk, jb) = + tangent_orientation_view(je, jb) * + (z_rot_v_view(edge_vertex_idx_view(je, jb, 2), jk, edge_vertex_blk_view(je, jb, 2)) - z_rot_v_view(edge_vertex_idx_view(je, jb, 1), jk, edge_vertex_blk_view(je, jb, 1))) * + inv_primal_edge_length_view(je, jb) + + (z_div_c_view(cell_edge_idx_view(je, jb, 2), jk, cell_edge_blk_view(je, jb, 2)) - z_div_c_view(cell_edge_idx_view(je, jb, 1), jk, cell_edge_blk_view(je, jb, 1))) * + inv_dual_edge_length_view(je, jb); + + }); + } +} + + +template +void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, + const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, + const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); + + +template +void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, + const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div, + const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 3aa905f..5e89713 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -29,3 +29,14 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + + +template<typename T> +void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation, + const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div, + const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp index 73e5aa7..4e22793 100644 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -36,7 +36,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ @@ -44,7 +44,77 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons edge_cell_idx, edge_cell_blk, inv_dual_edge_length, cell_edge_idx, cell_edge_blk, geofac_n2s, geofac_div, nabla2_psi_c, - i_startblk[3], i_endblk[3], i_startidx_in[3], i_endidx_in[3], + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); } + +void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ + + + nabla2_scalar_avg_lib<double>(psi_c, cell_neighbor_idx,cell_neighbor_blk, + geofac_n2s, avg_coeff, nabla2_psi_c, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, + nblks_c, cell_type, patch_id, + nlev, slev, elev, nproma, l_limited_area, lacc); + +} + +void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ + + nabla2_scalar_avg_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + geofac_n2s, avg_coeff, nabla2_psi_c, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, + nblks_c, cell_type, patch_id, + nlev, slev, elev, nproma, l_limited_area, lacc); +} + + +void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, + const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, + const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ + + nabla2_vec_atmos_lib<double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, + edge_vertex_blk, cell_edge_idx, cell_edge_blk, + vert_edge_idx, vert_edge_blk, tangent_orientation, + inv_primal_edge_length, inv_dual_edge_length, geofac_div, + geofac_rot,nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c, + i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c, + nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc); +} + + +void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, + const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div, + const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ + + nabla2_vec_atmos_lib<float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, + edge_vertex_blk, cell_edge_idx, cell_edge_blk, + vert_edge_idx, vert_edge_blk, tangent_orientation, + inv_primal_edge_length, inv_dual_edge_length, geofac_div, + geofac_rot, nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c, + i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c, + nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc); + + +} diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h index c0fe237..7b22527 100644 --- a/src/horizontal/mo_lib_laplace_bindings.h +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -18,7 +18,7 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, - int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); @@ -26,7 +26,39 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3], + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + +void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + +void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + +void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, + const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, + const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); + + +void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, + const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div, + const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); + } -- GitLab From 8ae37ff24e8c5c5c7da38a566a372f665c97eba8 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Thu, 27 Feb 2025 08:29:57 +0100 Subject: [PATCH 43/50] Fixed typo --- src/horizontal/mo_lib_laplace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 361d752..32c6e00 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -11,8 +11,8 @@ #include "mo_lib_laplace.hpp" //#include "mo_lib_gradients.hpp" -//#inlcude "mo_fortran_tools.hpp" -//#inlcude "mo_lib_divrot" +//#include "mo_fortran_tools.hpp" +//#include "mo_lib_divrot" #include <Kokkos_Core.hpp> #include <iostream> #include <utility> -- GitLab From 37875ce9f31a48cb2de40313009192f4d102f732 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Thu, 27 Feb 2025 08:50:46 +0100 Subject: [PATCH 44/50] Fix --- src/horizontal/mo_lib_laplace.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 32c6e00..8dac99a 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -253,7 +253,11 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i }); } }//if -}//switch + break; + + default: + std::cerr << "Unknown value for cell_type: " << cell_type << '\n'; + }//switch }//void -- GitLab From a671c52944ef93369c5b48f496f2637272ba2945 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante2.lvt.dkrz.de> Date: Tue, 8 Apr 2025 16:25:17 +0200 Subject: [PATCH 45/50] Added the test for laplace --- test/c/test_mo_lib_laplace.cpp | 171 +++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 test/c/test_mo_lib_laplace.cpp diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp new file mode 100644 index 0000000..741f430 --- /dev/null +++ b/test/c/test_mo_lib_laplace.cpp @@ -0,0 +1,171 @@ + // ICON + // + // --------------------------------------------------------------- + // Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss + // Contact information: icon-model.org + // + // See AUTHORS.TXT for a list of authors + // See LICENSES/ for license information + // SPDX-License-Identifier: BSD-3-Clause + // --------------------------------------------------------------- + + #include <gtest/gtest.h> + #include <Kokkos_Core.hpp> + #include <vector> + #include "mo_lib_laplace.cpp" + + // Free-function helpers for 3D and 4D array sizes (assumed column-major) + template<typename T> + size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; + } + + template<typename T> + size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; + } + + // Define a helper struct that holds the two types. + template<typename InT, typename OutT> + struct MixedPrecision { + using in_type = InT; + using out_type = OutT; + }; + + + // Define the list of type pairs we want to test. + typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + + +class Horizonral_dimension +{ + +public: + + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + static constexpr int nblks_c = 2; + + // Parameter values. + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int i_startblk_e = 0; + int i_endblk_e = 0; + int slev = 0; + int i_startidx_e = 0; + int i_endidx_e = nproma - 1; + int nlev = 0; + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + int cell_type = 0; + bool lacc = false; // Not using ACC-specific behavior. +}; + + + + + // Define a typed test fixture. + template <typename TypePair> + class MoLibLaplaceMixedTestFixture : public ::testing::Test { + public: + + //Arrays for nabla2_scalar_lib + std::vector<T> psi_c; //Dimension (nproma,nlev,nblks_c) + std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) + std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3) + std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2) + std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2) + std::vector<T> inv_dual_edge_length; //Dimension (nproma,nblks_e) + std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3) + std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3) + std::vector<T> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c) + std::vector<T> geofac_div; //Dimension (nproma,cell_type,nblks_c) + std::vector<T> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c) + + MoLibLaplaceMixedTestFixture() { + + + //Arrays for nabla2_scalar_lib + psi_c.resize(num_elements_3d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); + inv_dual_edge_length.resize(num_elements_2d<T>(nproma,nblks_e), static_cast<T>(1)); + geofac_n2s.resize(num_elements_2d<T> (nproma,cell_type+1,nblks_c), static_cast<T>(1)); + geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); + nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); + cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3)); + cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_e,3)); + edge_cell_idx.resize(num_elements_3d<int> (nproma,nblks_e,2)); + edge_cell_blk.resize(num_elements_3d<int>(nproma,nlev,nblks_c)); + cell_edge_idx.resize(num_elements_2d<int>(nproma,nblks_c,3)); + cell_edge_blk.resize(num_elements_2d<int>(nproma,nblks_c,3)); + geofac_n2s.resize(num_elements_2d<T>(nproma,cell_type+1,nblks_c), static_cast<T>(1)); + geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); + nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(0)); + + // Allocate and initialize inputs. + psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + + cell_neighbor_idx.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); + cell_neighbor_blk.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); + geofac_n2s.resize(num_elements_3d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } + }; + +typedef ::testing::Types<float, double> SingleType; + +TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib); +TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + + nabla2_scalar_lib<InType, OutType>( + this->psi_c.data(), + this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), + this->edge_cell_idx.data(), + this->edge_cell_blk.data(), + this->inv_dual_edge_length.data(), + this->cell_edge_idx.data(), + this->cell_edge_blk.data(), + this->geofac_n2s.data(), + this->geofac_div.data(), + this->nabla2_psi_c.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->i_startblk_e, + this->i_endblk_e, + this->i_startidx_e, + this->i_endidx_e, + this->nlev, + this->slev, + this->elev, + this->nproma, + this->nblks_e, + this->nblks_c, + this->cell_type, + this->lacc) + + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev ; level < this->elev; ++level) { + for (int i = this->i_startidx ; i < this->i_endidx; ++i) { + + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab From 2a5aa431f7ea2f1cbd4ee0edb19c4262e469c857 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante2.lvt.dkrz.de> Date: Wed, 9 Apr 2025 16:24:28 +0200 Subject: [PATCH 46/50] WIP:test for laplace --- src/horizontal/mo_lib_laplace.cpp | 23 +++-- src/horizontal/mo_lib_laplace.hpp | 4 +- test/c/CMakeLists.txt | 2 + test/c/test_mo_lib_laplace.cpp | 135 +++++++++++++----------------- 4 files changed, 78 insertions(+), 86 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 8dac99a..3c52f20 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -18,18 +18,18 @@ #include <utility> #include <memory> -template<typename T> +template<typename T, typename S> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, - const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c, + const T* geofac_n2s, const T* geofac_div, S* nabla2_psi_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; @@ -90,7 +90,7 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* }//void template -void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, +void nabla2_scalar_lib<double, double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c, @@ -98,8 +98,20 @@ void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + template -void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, +void nabla2_scalar_lib<double, float>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + + + +template +void nabla2_scalar_lib<float, float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c, @@ -107,7 +119,6 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); - //--------------------------------nabla2_scalar_avg_lib------------------------------------- diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 5e89713..e132247 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -14,11 +14,11 @@ #include <Kokkos_Core.hpp> #include <vector> -template<typename T> +template<typename T, typename S> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, - const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, + const T* geofac_n2s, const T* geofac_div, S* nabla2_psi_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 13c5dfe..2180e1a 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -26,6 +26,7 @@ set(SOURCES test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp + test_mo_lib_laplace.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) @@ -35,6 +36,7 @@ target_link_libraries(iconmath_test_c PUBLIC iconmath-support iconmath-interpolation + iconmath-horizontal PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp index 741f430..f5e1995 100644 --- a/test/c/test_mo_lib_laplace.cpp +++ b/test/c/test_mo_lib_laplace.cpp @@ -12,19 +12,22 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> - #include "mo_lib_laplace.cpp" + #include "mo_lib_laplace.hpp" - // Free-function helpers for 3D and 4D array sizes (assumed column-major) - template<typename T> +template<typename T> size_t num_elements_3d(int d1, int d2, int d3) { return static_cast<size_t>(d1) * d2 * d3; } - + +template<typename T> + size_t num_elements_2d(int d1, int d2) { + return static_cast<size_t>(d1) * d2; + } + template<typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { return static_cast<size_t>(d1) * d2 * d3 * d4; - } - +} // Define a helper struct that holds the two types. template<typename InT, typename OutT> struct MixedPrecision { @@ -37,97 +40,73 @@ typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<double, float>, MixedPrecision<float, float> > MixedTypes; - - -class Horizonral_dimension -{ - -public: - - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) - static constexpr int nblks_c = 2; - - // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] - int i_startidx_in = 0; - int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 - int i_startblk_e = 0; - int i_endblk_e = 0; - int slev = 0; - int i_startidx_e = 0; - int i_endidx_e = nproma - 1; - int nlev = 0; - int slev = 0; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) - int cell_type = 0; - bool lacc = false; // Not using ACC-specific behavior. -}; - - // Define a typed test fixture. template <typename TypePair> class MoLibLaplaceMixedTestFixture : public ::testing::Test { - public: + public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + static constexpr int nproma = 3; // inner loop length + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + static constexpr int nblks_c = 2; + + int i_startblk = 0; + int i_endblk = 1; + int i_startidx_in = 0; + int i_endidx_in = nproma-1 ; + int i_startblk_e = 0 ; + int i_endblk_e = 0 ; + int i_startidx_e = 0; + int i_endidx_e = 0 ; + int nlev = 4; + int slev = 0 ; + int elev = nlev - 1; + int cell_type = 0 ; + const bool lacc = false ; + + //Arrays for nabla2_scalar_lib - std::vector<T> psi_c; //Dimension (nproma,nlev,nblks_c) + std::vector<InType> psi_c; //Dimension (nproma,nlev,nblks_c) std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3) std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2) std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2) - std::vector<T> inv_dual_edge_length; //Dimension (nproma,nblks_e) + std::vector<InType> inv_dual_edge_length; //Dimension (nproma,nblks_e) std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3) std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3) - std::vector<T> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c) - std::vector<T> geofac_div; //Dimension (nproma,cell_type,nblks_c) - std::vector<T> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c) + std::vector<InType> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c) + std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c) + std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c) MoLibLaplaceMixedTestFixture() { - //Arrays for nabla2_scalar_lib - psi_c.resize(num_elements_3d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); - inv_dual_edge_length.resize(num_elements_2d<T>(nproma,nblks_e), static_cast<T>(1)); - geofac_n2s.resize(num_elements_2d<T> (nproma,cell_type+1,nblks_c), static_cast<T>(1)); - geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); - nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); - cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3)); - cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_e,3)); - edge_cell_idx.resize(num_elements_3d<int> (nproma,nblks_e,2)); - edge_cell_blk.resize(num_elements_3d<int>(nproma,nlev,nblks_c)); - cell_edge_idx.resize(num_elements_2d<int>(nproma,nblks_c,3)); - cell_edge_blk.resize(num_elements_2d<int>(nproma,nblks_c,3)); - geofac_n2s.resize(num_elements_2d<T>(nproma,cell_type+1,nblks_c), static_cast<T>(1)); - geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); - nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(0)); - - // Allocate and initialize inputs. - psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); - - cell_neighbor_idx.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); - cell_neighbor_blk.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); - geofac_n2s.resize(num_elements_3d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); - - // Allocate output arrays and initialize to zero. - p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + psi_c.resize(num_elements_3d<InType>(nproma,nlev,nblks_c), static_cast<InType>(1)); + inv_dual_edge_length.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); + geofac_n2s.resize(num_elements_3d<InType>(nproma,cell_type+1,nblks_c), static_cast<InType>(1)); + geofac_div.resize(num_elements_3d<InType>(nproma,cell_type,nblks_c), static_cast<InType>(1)); + nabla2_psi_c.resize(num_elements_3d<OutType>(nproma,nlev,nblks_c), static_cast<OutType>(0)); + cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); + cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); + edge_cell_idx.resize(num_elements_3d<int>(nproma,nblks_e,2), 1); + edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); + cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); + cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); } }; -typedef ::testing::Types<float, double> SingleType; -TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib); -TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) { +TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, MixedTypes); + +TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; - nabla2_scalar_lib<InType, OutType>( this->psi_c.data(), @@ -145,7 +124,7 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) { this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->i_startblk_e, + this->i_startblk_e, this->i_endblk_e, this->i_startidx_e, this->i_endidx_e, @@ -154,13 +133,13 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) { this->elev, this->nproma, this->nblks_e, - this->nblks_c, + this->nblks_c, this->cell_type, - this->lacc) + this->lacc); for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev ; level < this->elev; ++level) { - for (int i = this->i_startidx ; i < this->i_endidx; ++i) { + for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) { size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) -- GitLab From 38db433dba4597cbac291ffca01e2bfdfb49687b Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante3.lvt.dkrz.de> Date: Wed, 9 Apr 2025 22:28:51 +0200 Subject: [PATCH 47/50] Fixed linking error --- src/horizontal/mo_lib_laplace_bindings.cpp | 27 ++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp index 4e22793..984c50a 100644 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -17,12 +17,12 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, - const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, + const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ - nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + nabla2_scalar_lib<double, double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, edge_cell_idx, edge_cell_blk, inv_dual_edge_length, cell_edge_idx, cell_edge_blk, geofac_n2s, geofac_div, nabla2_psi_c, @@ -35,12 +35,12 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, const int* cell_edge_idx, const int* cell_edge_blk, - const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, + const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ - nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + nabla2_scalar_lib<float,float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, edge_cell_idx, edge_cell_blk, inv_dual_edge_length, cell_edge_idx, cell_edge_blk, geofac_n2s, geofac_div, nabla2_psi_c, @@ -49,6 +49,25 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); } + + +void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + nabla2_scalar_lib<double, float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + edge_cell_idx, edge_cell_blk, inv_dual_edge_length, + cell_edge_idx, cell_edge_blk, + geofac_n2s, geofac_div, nabla2_psi_c, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); +} + void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], -- GitLab From ecc93abc47a7c4225393562a4aba1b41b4f95068 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante3.lvt.dkrz.de> Date: Wed, 9 Apr 2025 22:32:09 +0200 Subject: [PATCH 48/50] Fixed typo --- src/horizontal/mo_lib_divrot.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90 index a2f2ad9..c1360f6 100644 --- a/src/horizontal/mo_lib_divrot.F90 +++ b/src/horizontal/mo_lib_divrot.F90 @@ -561,7 +561,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -800,7 +800,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1008,7 +1008,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1295,7 +1295,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & -- GitLab From 59e788ffbca278a46d0c1161b1aa608de556dbe5 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante3.lvt.dkrz.de> Date: Thu, 10 Apr 2025 11:02:38 +0200 Subject: [PATCH 49/50] Basic structure test for nabla2_scalar_lib nabla2_scalar_avg_lib --- src/horizontal/mo_lib_laplace.cpp | 19 +++-- src/horizontal/mo_lib_laplace.hpp | 6 +- src/horizontal/mo_lib_laplace_bindings.cpp | 22 ++++- src/horizontal/mo_lib_laplace_bindings.h | 4 +- test/c/test_mo_lib_laplace.cpp | 97 ++++++++++++++++------ 5 files changed, 109 insertions(+), 39 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 3c52f20..1aca254 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -122,15 +122,15 @@ void nabla2_scalar_lib<float, float>(const float* psi_c, const int* cell_neighbo //--------------------------------nabla2_scalar_avg_lib------------------------------------- -template<typename T> +template<typename T, typename S> void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + const T* geofac_n2s, const T* avg_coeff, S* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); @@ -273,19 +273,26 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i template -void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, +void nabla2_scalar_avg_lib<double, double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); template -void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, +void nabla2_scalar_avg_lib<double, float>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, float* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); +template +void nabla2_scalar_avg_lib<float, float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + //-----------------------------------------nabla2_vec_atmos_lib--------------------------------------- diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index e132247..0fde697 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -22,9 +22,11 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); -template<typename T> + + +template<typename T, typename S> void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + const T* geofac_n2s, const T* avg_coeff, S* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp index 984c50a..a4e556b 100644 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -75,7 +75,7 @@ void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ - nabla2_scalar_avg_lib<double>(psi_c, cell_neighbor_idx,cell_neighbor_blk, + nabla2_scalar_avg_lib<double, double>(psi_c, cell_neighbor_idx,cell_neighbor_blk, geofac_n2s, avg_coeff, nabla2_psi_c, i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, nblks_c, cell_type, patch_id, @@ -83,13 +83,27 @@ void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, } +void nabla2_scalar_avg_lib_sp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, float* nabla2_psi_c, + int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ + + nabla2_scalar_avg_lib<double, float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + geofac_n2s, avg_coeff, nabla2_psi_c, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, + nblks_c, cell_type, patch_id, + nlev, slev, elev, nproma, l_limited_area, lacc); +} + + void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ - nabla2_scalar_avg_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + nabla2_scalar_avg_lib<float, float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, geofac_n2s, avg_coeff, nabla2_psi_c, i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, nblks_c, cell_type, patch_id, @@ -97,6 +111,10 @@ void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, } + + + + void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h index 7b22527..49f3c4b 100644 --- a/src/horizontal/mo_lib_laplace_bindings.h +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -31,13 +31,13 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, + const double* geofac_n2s, const double* avg_coeff, const double* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + const float* geofac_n2s, const float* avg_coeff, const float* nabla2_psi_c, int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], int nblks_c, int cell_type, int patch_id, int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp index f5e1995..0e25792 100644 --- a/test/c/test_mo_lib_laplace.cpp +++ b/test/c/test_mo_lib_laplace.cpp @@ -63,6 +63,16 @@ template<typename T> int i_endblk_e = 0 ; int i_startidx_e = 0; int i_endidx_e = 0 ; + + //nabla2_scalar_avg_lib_sp specifics + std::vector<int> i_startblk_in{}; + std::vector<int> i_endblk_in{}; + std::vector<int> i_startidx_array{}; + std::vector<int> i_endidx_array{}; + int patch_id = 0; + const bool l_limited_area = false; + // + int nlev = 4; int slev = 0 ; int elev = nlev - 1; @@ -84,6 +94,9 @@ template<typename T> std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c) std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c) + //Arrays for nabla2_scalar_avg_lib_sp + std::vector<InType> avg_coeff; + MoLibLaplaceMixedTestFixture() { //Arrays for nabla2_scalar_lib @@ -98,43 +111,33 @@ template<typename T> edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); + + //Allocating nabla2_scalar_avg_lib_sp specifics + i_startblk_in.resize(3, i_startblk); + i_endblk_in.resize(3, i_endblk); + i_startidx_array.resize(3, i_startidx_in); + i_endidx_array.resize(3, i_endidx_in); } }; TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, MixedTypes); -TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) { +TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; nabla2_scalar_lib<InType, OutType>( - this->psi_c.data(), - this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), - this->edge_cell_idx.data(), - this->edge_cell_blk.data(), - this->inv_dual_edge_length.data(), - this->cell_edge_idx.data(), - this->cell_edge_blk.data(), - this->geofac_n2s.data(), - this->geofac_div.data(), - this->nabla2_psi_c.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->i_startblk_e, - this->i_endblk_e, - this->i_startidx_e, - this->i_endidx_e, - this->nlev, - this->slev, - this->elev, - this->nproma, - this->nblks_e, - this->nblks_c, - this->cell_type, + this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->edge_cell_idx.data(), + this->edge_cell_blk.data(), this->inv_dual_edge_length.data(), + this->cell_edge_idx.data(),this->cell_edge_blk.data(), + this->geofac_n2s.data(), this->geofac_div.data(), + this->nabla2_psi_c.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->i_startblk_e, this->i_endblk_e, this->i_startidx_e, + this->i_endidx_e, this->nlev, this->slev, this->elev, + this->nproma, this->nblks_e, this->nblks_c, this->cell_type, this->lacc); for (int block = this->i_startblk; block <= this->i_endblk; ++block) { @@ -148,3 +151,43 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) { } } } + + +TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_avg_lib) { +using InType = typename TestFixture::InType; +using OutType = typename TestFixture::OutType; + + +nabla2_scalar_avg_lib<InType, OutType>( + this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->geofac_n2s.data(), + this->avg_coeff.data(), this->nabla2_psi_c.data(), + this->i_startblk_in.data(), this->i_endblk_in.data(), + this->i_startidx_array.data(), this->i_endidx_array.data(), + this->nblks_c, this->cell_type, this->patch_id, + this->nlev, this->slev, this->elev, this->nproma, + this->l_limited_area, this->lacc); + + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev ; level < this->elev; ++level) { + for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) { + + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + + + + + + + + + + + + -- GitLab From 2de0e2bd68dab4324072ae5954302145788e6a78 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante3.lvt.dkrz.de> Date: Thu, 10 Apr 2025 13:32:23 +0200 Subject: [PATCH 50/50] Added test for rot_vertex_atmos_lib --- src/horizontal/mo_lib_laplace.cpp | 21 +++-- src/horizontal/mo_lib_laplace.hpp | 4 +- src/horizontal/mo_lib_laplace_bindings.cpp | 28 ++++++- src/horizontal/mo_lib_laplace_bindings.h | 4 +- test/c/test_mo_lib_laplace.cpp | 95 ++++++++++++++++++---- 5 files changed, 121 insertions(+), 31 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 1aca254..c5c3ce3 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -296,12 +296,12 @@ void nabla2_scalar_avg_lib<float, float>(const float* psi_c, const int* cell_nei //-----------------------------------------nabla2_vec_atmos_lib--------------------------------------- -template<typename T> +template<typename T, typename S> void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation, const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div, - const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + const T* geofac_rot, S* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ @@ -309,7 +309,7 @@ void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* e typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; UnmanagedConstT3D vec_e_view(vec_e, nproma,nlev,nblks_e); @@ -370,7 +370,7 @@ void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* e template -void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, +void nabla2_vec_atmos_lib<double, double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, @@ -381,7 +381,17 @@ void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx, template -void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, +void nabla2_vec_atmos_lib<double, float>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, + const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, + const double* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); + + template +void nabla2_vec_atmos_lib<float, float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div, @@ -389,3 +399,4 @@ void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, c int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); + diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 0fde697..ac0906d 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -33,12 +33,12 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i -template<typename T> +template<typename T, typename S> void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation, const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div, - const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + const T* geofac_rot, S* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp index a4e556b..e4dfe7a 100644 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -124,7 +124,7 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ - nabla2_vec_atmos_lib<double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, + nabla2_vec_atmos_lib<double, double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, edge_vertex_blk, cell_edge_idx, cell_edge_blk, vert_edge_idx, vert_edge_blk, tangent_orientation, inv_primal_edge_length, inv_dual_edge_length, geofac_div, @@ -135,6 +135,27 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons } + + +void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, + const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, + const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, + const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, + const double* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, + int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ + + nabla2_vec_atmos_lib<double, float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, + edge_vertex_blk, cell_edge_idx, cell_edge_blk, + vert_edge_idx, vert_edge_blk, tangent_orientation, + inv_primal_edge_length, inv_dual_edge_length, geofac_div, + geofac_rot,nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c, + i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c, + nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc); +} + void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, @@ -144,7 +165,7 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){ - nabla2_vec_atmos_lib<float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, + nabla2_vec_atmos_lib<float,float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx, edge_vertex_blk, cell_edge_idx, cell_edge_blk, vert_edge_idx, vert_edge_blk, tangent_orientation, inv_primal_edge_length, inv_dual_edge_length, geofac_div, @@ -152,6 +173,5 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v, i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c, nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc); - - } + diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h index 49f3c4b..9329a52 100644 --- a/src/horizontal/mo_lib_laplace_bindings.h +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -46,7 +46,7 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation, const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div, - const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + const double* geofac_rot, const double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); @@ -56,7 +56,7 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk, const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation, const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div, - const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, + const float* geofac_rot, const float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c, int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c, int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc); diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp index 0e25792..ad482b6 100644 --- a/test/c/test_mo_lib_laplace.cpp +++ b/test/c/test_mo_lib_laplace.cpp @@ -59,11 +59,21 @@ template<typename T> int i_endblk = 1; int i_startidx_in = 0; int i_endidx_in = nproma-1 ; + int i_startblk_e = 0 ; int i_endblk_e = 0 ; int i_startidx_e = 0; int i_endidx_e = 0 ; + int i_startblk_c = 0; + int i_endblk_c = 0 ; + int i_startidx_c = 0; + int i_endidx_c = 0; + int i_startblk_v = 0; + int i_endblk_v = 0; + int i_startidx_v =0; + int i_endidx_v = 0; + //nabla2_scalar_avg_lib_sp specifics std::vector<int> i_startblk_in{}; std::vector<int> i_endblk_in{}; @@ -71,7 +81,9 @@ template<typename T> std::vector<int> i_endidx_array{}; int patch_id = 0; const bool l_limited_area = false; - // + + + int nlev = 4; int slev = 0 ; @@ -82,20 +94,35 @@ template<typename T> //Arrays for nabla2_scalar_lib - std::vector<InType> psi_c; //Dimension (nproma,nlev,nblks_c) - std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) - std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3) - std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2) - std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2) - std::vector<InType> inv_dual_edge_length; //Dimension (nproma,nblks_e) - std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3) - std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3) - std::vector<InType> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c) - std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c) - std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c) + std::vector<InType> psi_c{}; //Dimension (nproma,nlev,nblks_c) + std::vector<int> cell_neighbor_idx{}; //Dimenaion (nproma,nblks_c,3) + std::vector<int> cell_neighbor_blk{}; //Dimension (nproma,nblks_c,3) + std::vector<int> edge_cell_idx{}; //Dimension (nproma,nblks_e,2) + std::vector<int> edge_cell_blk{}; //Dimension (nproma,nblks_e,2) + std::vector<InType> inv_dual_edge_length{}; //Dimension (nproma,nblks_e) + std::vector<int> cell_edge_idx{}; //Dimension (nproma,nblks_c,3) + std::vector<int> cell_edge_blk{}; //Dimension (nproma,nblks_c,3) + std::vector<InType> geofac_n2s{}; // Dimension (nproma,cell_type+1,nblks_c) + std::vector<InType> geofac_div{}; //Dimension (nproma,cell_type,nblks_c) + std::vector<OutType> nabla2_psi_c{}; //Dimension (nproma,nlev,nblks_c) //Arrays for nabla2_scalar_avg_lib_sp - std::vector<InType> avg_coeff; + std::vector<InType> avg_coeff{}; + + //Arrays for nabla2_vec_atmos_lib + std::vector<InType> vec_e{}; //Dimension (nproma,nlev,nblks_e) + std::vector<int> edge_vertex_idx{}; //Dimension (nproma,nblks_e,4) + std::vector<int> edge_vertex_blk{}; //Dimension (nproma,nblks_e,4) + std::vector<int> vert_edge_idx{}; //Dimension (nproma,nblks_v,6) + std::vector<int> vert_edge_blk{}; //Dimension (nproma,nblks_v,6) + std::vector<InType> tangent_orientation{}; //Dimension (nproma,nblks_e) + std::vector<InType> inv_primal_edge_length{};//Dimension (nproma,nblks_e) + + std::vector<InType> geofac_rot{}; //Dimension (nproma,9-cell_type,nblks_v) + std::vector<OutType> nabla2_vec_e{}; //Dimension (nproma,nlev,nblks_e) + + + MoLibLaplaceMixedTestFixture() { @@ -111,6 +138,17 @@ template<typename T> edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); + + + vec_e.resize(num_elements_3d<InType>(nproma,nlev,nblks_e), static_cast<InType>(1)); + tangent_orientation.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); + inv_primal_edge_length.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); + geofac_rot.resize(num_elements_3d<InType>(nproma,9-cell_type,nblks_v), static_cast<InType>(1)); + nabla2_vec_e.resize(num_elements_3d<OutType>(nproma,nlev,nblks_e), static_cast<InType>(0)); + edge_vertex_idx.resize(num_elements_3d<InType>(nproma,nblks_e,4), 1); + edge_vertex_blk.resize(num_elements_3d<InType>(nproma,nblks_e,4), 0); + vert_edge_idx.resize(num_elements_3d<InType>(nproma,nblks_v,6), 1); + vert_edge_blk.resize(num_elements_3d<InType>(nproma,nblks_v,6), 0); //Allocating nabla2_scalar_avg_lib_sp specifics i_startblk_in.resize(3, i_startblk); @@ -181,13 +219,34 @@ nabla2_scalar_avg_lib<InType, OutType>( } +TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_vec_atmos_lib) { +using InType = typename TestFixture::InType; +using OutType = typename TestFixture::OutType; +nabla2_vec_atmos_lib<InType, OutType>( + this->vec_e.data(), this->edge_cell_idx.data(), + this->edge_cell_blk.data(), this->edge_vertex_idx.data(), + this->edge_vertex_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->tangent_orientation.data(), + this->inv_primal_edge_length.data(), this->inv_dual_edge_length.data(), + this->geofac_div.data(), this->geofac_rot.data(), this->nabla2_vec_e.data(), + this->i_startblk_c, this->i_endblk_c, this->i_startidx_c, this->i_endidx_c, + this->i_startblk_v, this->i_endblk_v, this->i_startidx_v, this->i_endidx_v, + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->nlev, this->nblks_c, this->nblks_v, this-> nblks_e, this->slev, this->elev, + this->nproma, this->cell_type, this->lacc); + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev ; level < this->elev; ++level) { + for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) { - - - - - + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + EXPECT_NEAR(this->nabla2_vec_e[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab