From 3a9361dc368018d6c2e2911dd2b8781d9eb0b205 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante5.lvt.dkrz.de> Date: Mon, 24 Feb 2025 14:21:48 +0100 Subject: [PATCH 01/33] Added bindings for rbf --- .../mo_lib_intp_rbf_bindings.cpp | 32 +++++++++++++++++++ src/interpolation/mo_lib_intp_rbf_bindings.h | 22 +++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.h diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp new file mode 100644 index 0000000..281a89f --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include mo_lib_intp_rbf_bindings.h +#include mo_lib_intp_rbf.hpp + + +void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk + const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool lacc){ + + +rbf_interpol_c2grad_lib<wp>(p_cell_in, rbf_c2grad_idx, + rbf_c2grad_blk, rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, lacc); + + +} + + + diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h new file mode 100644 index 0000000..826fb7e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -0,0 +1,22 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#ifdef __SINGLE_PRECISION + using wp = single; +#else + using wp = double; +#endif + +void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool lacc); -- GitLab From 63e4efcf558af3b5d08ecc7254b258c099036d8f Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante5.lvt.dkrz.de> Date: Mon, 24 Feb 2025 14:42:23 +0100 Subject: [PATCH 02/33] added Views for c2grad_lib --- src/interpolation/mo_lib_intp_rbf.cpp | 31 +++++++++++++++++++++++++++ src/interpolation/mo_lib_intp_rbf.hpp | 21 ++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 src/interpolation/mo_lib_intp_rbf.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf.hpp diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp new file mode 100644 index 0000000..c74503e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -0,0 +1,31 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#inlcude mo_lib_intp_rbf.hpp + + +void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra + const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool lacc){ + + //aliases for unmanaged Kokkos views + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + //to avoid memory ownership issues + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + UnmanagedConstT3D grad_x_view(grad_x, nproma, nlev, nblks_c); + UnmanagedConstT3D grad_y_view(grad_y, nproma, nlev, nblks_c); + UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); + UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); +} diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp new file mode 100644 index 0000000..5fbf68b --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -0,0 +1,21 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T> +void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra + const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool lacc); -- GitLab From c1a0ccffe73b60020a51ad83502783d03a437a43 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante5.lvt.dkrz.de> Date: Mon, 24 Feb 2025 14:44:57 +0100 Subject: [PATCH 03/33] fixed typo! --- src/interpolation/mo_lib_intp_rbf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index c74503e..795d343 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -9,7 +9,7 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#inlcude mo_lib_intp_rbf.hpp +#include mo_lib_intp_rbf.hpp void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra -- GitLab From 016db45178c908b4e57f27e3950231ac0cf97143 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante3.lvt.dkrz.de> Date: Mon, 24 Feb 2025 15:22:30 +0100 Subject: [PATCH 04/33] added binding file into cmake --- src/interpolation/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 1051516..f1dc7b9 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -19,7 +19,7 @@ add_library( mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp interpolation_bindings.cpp -) + mo_lib_intp_rbf_bindings.cpp) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) -- GitLab From 4c640a3a852321ca55f18c9f3d2bd9f882e05b30 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante5.lvt.dkrz.de> Date: Mon, 24 Feb 2025 15:32:30 +0100 Subject: [PATCH 05/33] Fixed typo --- src/interpolation/mo_lib_intp_rbf_bindings.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index 281a89f..8bb0feb 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -9,8 +9,8 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include mo_lib_intp_rbf_bindings.h -#include mo_lib_intp_rbf.hpp +#include "mo_lib_intp_rbf_bindings.h" +#include "mo_lib_intp_rbf.hpp" void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk -- GitLab From 3fe938837b825e1d47d5c203c3131cb9d0c1b6a0 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante5.lvt.dkrz.de> Date: Mon, 24 Feb 2025 23:00:08 +0100 Subject: [PATCH 06/33] Ported rbf_interpol_c2grad_lib --- src/interpolation/CMakeLists.txt | 3 +- src/interpolation/mo_lib_intp_rbf.cpp | 66 ++++++++++++++++--- src/interpolation/mo_lib_intp_rbf.hpp | 6 +- .../mo_lib_intp_rbf_bindings.cpp | 25 +++++-- src/interpolation/mo_lib_intp_rbf_bindings.h | 18 ++--- 5 files changed, 91 insertions(+), 27 deletions(-) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index f1dc7b9..d5af767 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -19,7 +19,8 @@ add_library( mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp interpolation_bindings.cpp - mo_lib_intp_rbf_bindings.cpp) + mo_lib_intp_rbf_bindings.cpp + mo_lib_intp_rbf.cpp) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 795d343..666e02c 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -9,23 +9,73 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include mo_lib_intp_rbf.hpp +#include "mo_lib_intp_rbf.hpp" +#include <Kokkos_Core.hpp> - -void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra - const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, +template <typename T> +void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool lacc){ + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc){ //aliases for unmanaged Kokkos views typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; //to avoid memory ownership issues UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - UnmanagedConstT3D grad_x_view(grad_x, nproma, nlev, nblks_c); - UnmanagedConstT3D grad_y_view(grad_y, nproma, nlev, nblks_c); + UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); + UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c); UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); -} + UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + + grad_x_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 1, jc, jb)* + p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 1, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); + + }); + + }//for +}//void + +template +void rbf_interpol_c2grad_lib<double>(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +template +void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 5fbf68b..8006a43 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -15,7 +15,7 @@ #include <vector> template <typename T> -void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra - const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, +void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index 8bb0feb..a44a3da 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -9,24 +9,35 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- + #include "mo_lib_intp_rbf_bindings.h" #include "mo_lib_intp_rbf.hpp" -void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk - const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, +void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool lacc){ + int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){ -rbf_interpol_c2grad_lib<wp>(p_cell_in, rbf_c2grad_idx, - rbf_c2grad_blk, rbf_c2grad_coeff, +rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, grad_x, grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, - nproma, lacc); + nproma, nlev, nblk_c, rbf_c2grad_dim, lacc); +} -} +void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){ +rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, rbf_c2grad_dim, lacc); +} diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index 826fb7e..e7ef8f3 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -10,13 +10,15 @@ // --------------------------------------------------------------- #pragma once -#ifdef __SINGLE_PRECISION - using wp = single; -#else - using wp = double; -#endif +extern "C" { -void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, +void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool lacc); + int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); + +void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); +} -- GitLab From 19ab5976de9b83a3ac37333701826e0203851f8a Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Mon, 24 Feb 2025 23:24:44 +0100 Subject: [PATCH 07/33] Added grad_y --- src/interpolation/mo_lib_intp_rbf.cpp | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 666e02c..0b0e213 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -62,8 +62,29 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + rbf_c2grad_coeff_view(9, 1, jc, jb)* p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); - - }); + + grad_y_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 2, jc, jb)* + p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 2, jc, jb)* + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); + }); }//for }//void -- GitLab From 54ac88915fd7476b3e3f81713b6d97cd586e144b Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 10:30:28 +0100 Subject: [PATCH 08/33] Ported rbf_vec_interpol_cell_lib --- src/interpolation/mo_lib_intp_rbf.cpp | 86 +++++++++++++++++++ src/interpolation/mo_lib_intp_rbf.hpp | 7 ++ .../mo_lib_intp_rbf_bindings.cpp | 45 +++++++--- src/interpolation/mo_lib_intp_rbf_bindings.h | 16 +++- 4 files changed, 139 insertions(+), 15 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 0b0e213..fa9f372 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -100,3 +100,89 @@ void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2gra const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + + +//------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ + + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblk_c); + UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblk_c); + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblk_c); //TODO + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblk_c); + UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblk_c); + + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("rbf_vec_interpol_cell_lib", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + + p_u_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 1, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); + + p_v_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 2, jc, jb)* + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); + }); + Kokkos::fence(); + }//for +}//void + +template +void rbf_vec_interpol_cell_lib(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); +template +void rbf_vec_interpol_cell_lib(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 8006a43..520e9c6 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -19,3 +19,10 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + + +template <typename T> +void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index a44a3da..b19b28c 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -17,27 +17,46 @@ void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){ + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ -rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, nlev, nblk_c, rbf_c2grad_dim, lacc); + rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } - void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){ + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ + + + rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} +void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ -rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, nlev, nblk_c, rbf_c2grad_dim, lacc); + rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, + rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async); } +void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ + + rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async); +} diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index e7ef8f3..a9775fa 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -15,10 +15,22 @@ extern "C" { void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + +void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + + } -- GitLab From eac9e262eb66b25397b283b7e5e3efa5682d3a1c Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 11:32:42 +0100 Subject: [PATCH 09/33] Ported rbf_vec_interpol_edge_lib --- src/interpolation/mo_lib_intp_rbf.cpp | 59 ++++++++++++++++++- src/interpolation/mo_lib_intp_rbf.hpp | 7 +++ .../mo_lib_intp_rbf_bindings.cpp | 25 ++++++++ src/interpolation/mo_lib_intp_rbf_bindings.h | 12 ++++ 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index fa9f372..f125f1f 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -177,12 +177,67 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const }//void template -void rbf_vec_interpol_cell_lib(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, +void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); template -void rbf_vec_interpol_cell_lib(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, +void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + +//------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const T* rbf_vec_coeff_e, T* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ + + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma,nlev,nblks_e); + UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e,nproma,nblks_e); + UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e,nproma,nblks_e); + UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e,nproma,nblks_e); + UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e); + + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("rbf_vec_interpol_edge_lib", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int je){ + + p_vt_out_view(je, jk, jb) = + rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) + + rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) + + rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)) + + rbf_vec_coeff_e_view(4, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(4, je, jb), jk, rbf_vec_blk_e_view(4, je, jb)); + + }); + } +} + +template +void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + + +template +void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 520e9c6..e52989d 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -26,3 +26,10 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + +template <typename T> +void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const T* rbf_vec_coeff_e, T* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_asynci); diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index b19b28c..5c7943b 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -60,3 +60,28 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_ p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async); } + +void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ + + + rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, + rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in,i_endidx_in, + slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} + +void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ + + rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, + rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async); +} diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index a9775fa..9620614 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -32,5 +32,17 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async); +void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + } -- GitLab From 04a3601466fe73b9d396e60bdbd990d8c252e04a Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 11:56:15 +0100 Subject: [PATCH 10/33] Fixed index --- src/interpolation/mo_lib_intp_rbf.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index f125f1f..a7d9cbd 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -217,10 +217,10 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const KOKKOS_LAMBDA(const int jk, const int je){ p_vt_out_view(je, jk, jb) = + rbf_vec_coeff_e_view(0, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, rbf_vec_blk_e_view(0, je, jb)) + rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) + rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) + - rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)) + - rbf_vec_coeff_e_view(4, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(4, je, jb), jk, rbf_vec_blk_e_view(4, je, jb)); + rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)); }); } -- GitLab From f54eaa77aed000133b4c8c689dc20b49c37a6291 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 16:03:59 +0100 Subject: [PATCH 11/33] ported mo_lib_laplace --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/mo_lib_laplace.cpp | 104 +++++++++++++++++++++ src/horizontal/mo_lib_laplace.hpp | 24 +++++ src/horizontal/mo_lib_laplace_bindings.cpp | 50 ++++++++++ src/horizontal/mo_lib_laplace_bindings.h | 32 +++++++ 5 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 src/horizontal/mo_lib_laplace.cpp create mode 100644 src/horizontal/mo_lib_laplace.hpp create mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp create mode 100644 src/horizontal/mo_lib_laplace_bindings.h diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 078a14d..44f9e44 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -13,7 +13,9 @@ add_library( iconmath-horizontal mo_lib_divrot.F90 mo_lib_laplace.F90 - mo_lib_gradients.F90) + mo_lib_gradients.F90 + mo_lib_laplace_bindings.cpp + mo_lib_laplace.cpp) add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp new file mode 100644 index 0000000..48c4479 --- /dev/null +++ b/src/horizontal/mo_lib_laplace.cpp @@ -0,0 +1,104 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_laplace.hpp" +//#include "mo_lib_gradients.hpp" +#include <Kokkos_Core.hpp> +#include <iostream> + +template<typename T> +void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + + UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); + UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); + UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2); + UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2); + UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e); + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3); + UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); + UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); + UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); + + + switch (cell_type){ + + case 3: + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + + Kokkos::parallel_for("rbf_interpol_c2grad", + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jc){ + + nabla2_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * + geofac_n2s_view(jc, 4, jb); + }); + } + break; + + + case 6: +/* TODO + grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, + inv_dual_edge_length, z_grad_fd_norm_e,& + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + slev, elev, nproma); + + div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, + geofac_div, nabla2_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma); +*/ + default: + std::cout << "Unknown value for cell_type\n"; + + }//switch +}//void + +template +void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + +template +void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp new file mode 100644 index 0000000..2a6663c --- /dev/null +++ b/src/horizontal/mo_lib_laplace.hpp @@ -0,0 +1,24 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <vector> + +template<typename T> +void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp new file mode 100644 index 0000000..43a19b3 --- /dev/null +++ b/src/horizontal/mo_lib_laplace_bindings.cpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + + +#include "mo_lib_laplace_bindings.h" +#include "mo_lib_laplace.hpp" + + +void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + edge_cell_idx, edge_cell_blk, inv_dual_edge_length, + cell_edge_idx, cell_edge_blk, + geofac_n2s, geofac_div, nabla2_psi_c, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); +} + + +void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ + + nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, + edge_cell_idx, edge_cell_blk, inv_dual_edge_length, + cell_edge_idx, cell_edge_blk, + geofac_n2s, geofac_div, nabla2_psi_c, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, + nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); +} diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h new file mode 100644 index 0000000..96bdc64 --- /dev/null +++ b/src/horizontal/mo_lib_laplace_bindings.h @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + + + +extern "C"{ + +void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + +void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, + int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); +} -- GitLab From bd8a950b6fa2be57b0068ddd6744d28d6fabcaa0 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 25 Feb 2025 16:28:59 +0100 Subject: [PATCH 12/33] Fixed index for nabla2_scalar_lib --- src/horizontal/mo_lib_laplace.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 48c4479..203b3b7 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* KOKKOS_LAMBDA(const int jk, const int jc){ nabla2_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + + psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * geofac_n2s_view(jc, 2, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * - geofac_n2s_view(jc, 4, jb); + geofac_n2s_view(jc, 3, jb); }); } break; -- GitLab From 18b653dbc456a0061436df5e7a091b20a64592cd Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Wed, 26 Feb 2025 10:04:06 +0100 Subject: [PATCH 13/33] WIP: MR seperation. --- src/horizontal/mo_lib_laplace.cpp | 118 ++++++++++++++++++++++++++++++ src/horizontal/mo_lib_laplace.hpp | 7 ++ 2 files changed, 125 insertions(+) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 203b3b7..57ad292 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -11,8 +11,10 @@ #include "mo_lib_laplace.hpp" //#include "mo_lib_gradients.hpp" +//#inlcude "mo_fortran_tools.hpp" #include <Kokkos_Core.hpp> #include <iostream> +#include <utility> template<typename T> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, @@ -102,3 +104,119 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); + + +//--------------------------------nabla2_scalar_avg_lib------------------------------------- + + +template<typename T> +void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ + + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); + UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); + UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); + UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); + UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); + + int aux_c + UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local + + + + switch (cell_type){ + + case 3: + + if(slev == elev){ + + jk = slev; + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("aux_c", + Kokkos::RangePolicy<int>(i_startidx, i_endidx), + KOKKOS_LAMBDA(const int jc) { + + aux_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + geofac_n2s_view(jc, 1, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + geofac_n2s_view(jc, 2, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + geofac_n2s_view(jc, 3, jb); + + }); + } + + if (l_limited_area || patch_id > 1){ + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + /*TODO + gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), + Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); + */ + } + + i_startblk = i_startblk_in[3]; + i_endblk = i_endblk_in[3]; + + for(int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("DivGrad", + Kokkos::RangePolicy<int>(i_startidx, i_endidx), + KOKKOS_LAMBDA(const int jc) { + + nabla2_psi_c_view(jc, jk, jb) = + aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + + }); + } + }//if + break; + + default: + std::cout << "Default case.\n"; + +}//switch +}//void + + +template +void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + +template +void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 2a6663c..0a67f85 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -22,3 +22,10 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); +template<typename T> +void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, + int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, + int nblks_c, int cell_type, int patch_id, + int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); + -- GitLab From 4f0ea43895255e35edb7483f9187f78089403674 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Wed, 26 Feb 2025 10:10:02 +0100 Subject: [PATCH 14/33] Revert "WIP: MR seperation." This reverts commit fed348a36d7907ea9b24e154906ddf8b47ff489a. --- src/horizontal/mo_lib_laplace.cpp | 118 ------------------------------ src/horizontal/mo_lib_laplace.hpp | 7 -- 2 files changed, 125 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 57ad292..203b3b7 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -11,10 +11,8 @@ #include "mo_lib_laplace.hpp" //#include "mo_lib_gradients.hpp" -//#inlcude "mo_fortran_tools.hpp" #include <Kokkos_Core.hpp> #include <iostream> -#include <utility> template<typename T> void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, @@ -104,119 +102,3 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); - - -//--------------------------------nabla2_scalar_avg_lib------------------------------------- - - -template<typename T> -void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, - int nblks_c, int cell_type, int patch_id, - int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){ - - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); - UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); - UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); - UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); - UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); - - int aux_c - UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local - - - - switch (cell_type){ - - case 3: - - if(slev == elev){ - - jk = slev; - i_startblk = i_startblk_in[1]; - i_endblk = i_endblk_in[1]; - - for(int jb = i_startblk; jb < i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::parallel_for("aux_c", - Kokkos::RangePolicy<int>(i_startidx, i_endidx), - KOKKOS_LAMBDA(const int jc) { - - aux_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * - geofac_n2s_view(jc, 1, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * - geofac_n2s_view(jc, 2, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb); - - }); - } - - if (l_limited_area || patch_id > 1){ - - i_startblk = i_startblk_in[2]; - i_endblk = i_endblk_in[2]; - - /*TODO - gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), - Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc); - */ - } - - i_startblk = i_startblk_in[3]; - i_endblk = i_endblk_in[3]; - - for(int jb = i_startblk; jb < i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::parallel_for("DivGrad", - Kokkos::RangePolicy<int>(i_startidx, i_endidx), - KOKKOS_LAMBDA(const int jc) { - - nabla2_psi_c_view(jc, jk, jb) = - aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * - avg_coeff_view(jc, 1, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * - avg_coeff_view(jc, 2, jb) + - aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - avg_coeff_view(jc, 3, jb); - - }); - } - }//if - break; - - default: - std::cout << "Default case.\n"; - -}//switch -}//void - - -template -void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, - int nblks_c, int cell_type, int patch_id, - int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); - -template -void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, - int nblks_c, int cell_type, int patch_id, - int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp index 0a67f85..2a6663c 100644 --- a/src/horizontal/mo_lib_laplace.hpp +++ b/src/horizontal/mo_lib_laplace.hpp @@ -22,10 +22,3 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); -template<typename T> -void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c, - int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, - int nblks_c, int cell_type, int patch_id, - int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); - -- GitLab From 732769bcec238a625ee72b573854559442e12845 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Wed, 26 Feb 2025 10:10:15 +0100 Subject: [PATCH 15/33] Revert "Fixed index for nabla2_scalar_lib" This reverts commit 4122a81a9a283e42a5c8df8476e61820800bd0b0. --- src/horizontal/mo_lib_laplace.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp index 203b3b7..48c4479 100644 --- a/src/horizontal/mo_lib_laplace.cpp +++ b/src/horizontal/mo_lib_laplace.cpp @@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* KOKKOS_LAMBDA(const int jk, const int jc){ nabla2_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * - geofac_n2s_view(jc, 1, jb) + + psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * geofac_n2s_view(jc, 2, jb) + psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb); + geofac_n2s_view(jc, 3, jb) + + psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * + geofac_n2s_view(jc, 4, jb); }); } break; -- GitLab From c115d9d0d3d52c1a49915544760b8d32d92293ce Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Wed, 26 Feb 2025 10:10:24 +0100 Subject: [PATCH 16/33] Revert "ported mo_lib_laplace" This reverts commit 768b5176d0179e3cde4a76e894cc26a1d9513151. --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/mo_lib_laplace.cpp | 104 --------------------- src/horizontal/mo_lib_laplace.hpp | 24 ----- src/horizontal/mo_lib_laplace_bindings.cpp | 50 ---------- src/horizontal/mo_lib_laplace_bindings.h | 32 ------- 5 files changed, 1 insertion(+), 213 deletions(-) delete mode 100644 src/horizontal/mo_lib_laplace.cpp delete mode 100644 src/horizontal/mo_lib_laplace.hpp delete mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp delete mode 100644 src/horizontal/mo_lib_laplace_bindings.h diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 44f9e44..078a14d 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -13,9 +13,7 @@ add_library( iconmath-horizontal mo_lib_divrot.F90 mo_lib_laplace.F90 - mo_lib_gradients.F90 - mo_lib_laplace_bindings.cpp - mo_lib_laplace.cpp) + mo_lib_gradients.F90) add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal) diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp deleted file mode 100644 index 48c4479..0000000 --- a/src/horizontal/mo_lib_laplace.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_laplace.hpp" -//#include "mo_lib_gradients.hpp" -#include <Kokkos_Core.hpp> -#include <iostream> - -template<typename T> -void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ - - typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D; - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - - UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); - UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); - UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); - UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2); - UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2); - UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e); - UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3); - UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3); - UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); - UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); - UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); - - - switch (cell_type){ - - case 3: - for (int jb = i_startblk; jb < i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - - Kokkos::parallel_for("rbf_interpol_c2grad", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), - KOKKOS_LAMBDA(const int jk, const int jc){ - - nabla2_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * - geofac_n2s_view(jc, 2, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * - geofac_n2s_view(jc, 3, jb) + - psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * - geofac_n2s_view(jc, 4, jb); - }); - } - break; - - - case 6: -/* TODO - grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, - inv_dual_edge_length, z_grad_fd_norm_e,& - i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, - slev, elev, nproma); - - div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, - geofac_div, nabla2_psi_c, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, nproma); -*/ - default: - std::cout << "Unknown value for cell_type\n"; - - }//switch -}//void - -template -void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); - -template -void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp deleted file mode 100644 index 2a6663c..0000000 --- a/src/horizontal/mo_lib_laplace.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- -#pragma once - -#include "mo_lib_loopindices.hpp" -#include <Kokkos_Core.hpp> -#include <vector> - -template<typename T> -void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp deleted file mode 100644 index 43a19b3..0000000 --- a/src/horizontal/mo_lib_laplace_bindings.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - - -#include "mo_lib_laplace_bindings.h" -#include "mo_lib_laplace.hpp" - - -void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ - - nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, - edge_cell_idx, edge_cell_blk, inv_dual_edge_length, - cell_edge_idx, cell_edge_blk, - geofac_n2s, geofac_div, nabla2_psi_c, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, - nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); -} - - -void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){ - - nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, - edge_cell_idx, edge_cell_blk, inv_dual_edge_length, - cell_edge_idx, cell_edge_blk, - geofac_n2s, geofac_div, nabla2_psi_c, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, - nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); -} diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h deleted file mode 100644 index 96bdc64..0000000 --- a/src/horizontal/mo_lib_laplace_bindings.h +++ /dev/null @@ -1,32 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- -#pragma once - - - -extern "C"{ - -void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); - -void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, - int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc); -} -- GitLab From 6a1c7103cef418f5908d02507e0d40b30f1a923b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 26 Feb 2025 11:15:47 +0100 Subject: [PATCH 17/33] modified few things in the existing test for intp_rbf --- test/c/test_intp_rbf.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 0aa4f9b..4dc2d92 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -39,7 +39,7 @@ typedef ::testing::Types< MixedPrecision<double, double>, // Define a typed test fixture. template <typename TypePair> -class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +class RbfVecInterpolMixedTestFixture : public ::testing::Test { public: using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; @@ -69,7 +69,7 @@ public: std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) - RbfVecInterpolVertexMixedTestFixture() { + RbfVecInterpolMixedTestFixture() { // Allocate and initialize inputs. p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); @@ -82,9 +82,9 @@ public: } }; -TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); +TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); -TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { +TYPED_TEST(RbfVecInterpolMixedTestFixture, BasicTest) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -106,8 +106,8 @@ TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { this->lacc, this->acc_async, this->nlev, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + this->nblks_e, + this->nblks_v); // Check the outputs only for blocks in the range [i_startblk, i_endblk]. for (int block = this->i_startblk; block <= this->i_endblk; ++block) { -- GitLab From 8dadd736b6f11f68a9f86f89a5e77986358008b2 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 26 Feb 2025 11:16:28 +0100 Subject: [PATCH 18/33] fixed few bugs in one of the routine of intp_rbf --- src/interpolation/mo_lib_intp_rbf.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index a7d9cbd..e87a3e5 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -32,13 +32,13 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("rbf_interpol_c2grad", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), KOKKOS_LAMBDA(const int jk, const int jc){ grad_x_view(jc, jk, jb) = -- GitLab From 06cf4132c0920ba9615d9b9121a5e3cb7f2b048d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 26 Feb 2025 11:19:12 +0100 Subject: [PATCH 19/33] added a unit-test for rbf_interpol_c2grad_lib --- test/c/test_intp_rbf.cpp | 84 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 4dc2d92..a6aa11a 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -13,6 +13,7 @@ #include <Kokkos_Core.hpp> #include <vector> #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" +#include "mo_lib_intp_rbf.hpp" // Free-function helpers for 3D and 4D array sizes (assumed column-major) template<typename T> @@ -37,7 +38,88 @@ typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<double, float>, MixedPrecision<float, float> > MixedTypes; -// Define a typed test fixture. +// Define a typed test fixture for the functions which have the same input and output types +template <typename T> +class RbfInterpolTypedTestFixture : public ::testing::Test { +public: + + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // vertical levels + static constexpr int nblks_c = 2; // number of blocks for the cell arrays + static constexpr int rbf_c2grad_dim = 10; // fixed dimension for the RBF c2 gradient + + // Parameters for the index ranges. + int i_startblk = 0; + int i_endblk = 1; // Note: loop over jb uses jb < i_endblk, so blocks 0 and 1 + int i_startidx_in = 0; + int i_endidx_in = nproma-1; // [0, nproma) i.e. 0 .. nproma-1 + int slev = 0; + int elev = nlev-1; + bool lacc = false; // Not using any ACC-specific behavior. + + // Data arrays. + std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c + std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c + std::vector<T> grad_x; // size: nproma * nlev * nblks_c + std::vector<T> grad_y; // size: nproma * nlev * nblks_c + + RbfInterpolTypedTestFixture() { + size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; + size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; + size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; + p_cell_in.resize(size3d, static_cast<T>(1)); + rbf_c2grad_idx.resize(size3d_idx, 1); + rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); + grad_x.resize(size3d, static_cast<T>(0)); + grad_y.resize(size3d, static_cast<T>(0)); + } +}; + +typedef ::testing::Types<float, double> MyTypes; + +TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); + +TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) { + + rbf_interpol_c2grad_lib<TypeParam>( + this->p_cell_in.data(), + this->rbf_c2grad_idx.data(), + this->rbf_c2grad_blk.data(), + this->rbf_c2grad_coeff.data(), + this->grad_x.data(), + this->grad_y.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->rbf_c2grad_dim, + this->nlev, + this->nblks_c, + this->lacc); + + // For each block from i_startblk to i_endblk-1, and for each (i, level) + // the kernel sums rbf_c2grad_dim contributions, each equal to 1. + // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10). + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->grad_x[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk << ", index " << i; + EXPECT_NEAR(this->grad_y[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk << ", index " << i; + } + } + } +} + +// Define a typed test fixture for the functions which have different input and output types template <typename TypePair> class RbfVecInterpolMixedTestFixture : public ::testing::Test { public: -- GitLab From 6db33cd540dcab1d944cbf25fcf0b117ad8b8c52 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Thu, 27 Feb 2025 11:15:29 +0100 Subject: [PATCH 20/33] Rbf clean up --- src/interpolation/mo_lib_intp_rbf.cpp | 58 +++++++-------- src/interpolation/mo_lib_intp_rbf.hpp | 21 +++--- .../mo_lib_intp_rbf_bindings.cpp | 47 ++++++------ src/interpolation/mo_lib_intp_rbf_bindings.h | 72 ++++++++++--------- 4 files changed, 103 insertions(+), 95 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index e87a3e5..b22e737 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -91,24 +91,25 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons template void rbf_interpol_c2grad_lib<double>(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); template void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); //------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- template <typename T> void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ + const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, i + int rbf_vec_dim_c, bool lacc, bool acc_async){ typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; @@ -178,23 +179,25 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const template void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); template void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); //------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- template <typename T> void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const T* rbf_vec_coeff_e, T* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ + const T* rbf_vec_coeff_e, T* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; @@ -228,16 +231,15 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const template void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); template void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); - + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index e52989d..1e95737 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -16,20 +16,21 @@ template <typename T> void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); template <typename T> void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); + const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); template <typename T> void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const T* rbf_vec_coeff_e, T* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_asynci); + const T* rbf_vec_coeff_e, T* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index 5c7943b..2932f24 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -15,10 +15,9 @@ void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, @@ -27,11 +26,10 @@ void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } -void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - +void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, @@ -41,9 +39,10 @@ void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_id } void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int rbf_vec_dim_c, bool lacc, bool acc_async){ rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, @@ -52,9 +51,10 @@ void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c } void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int rbf_vec_dim_c, bool lacc, bool acc_async){ rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, @@ -62,11 +62,10 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_ } void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ - + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, @@ -75,10 +74,10 @@ void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_ } void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index 9620614..9473aef 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -12,37 +12,43 @@ extern "C" { -void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); - -void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); - -void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async); - -void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async); - -void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); - -void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); - - +void rbf_interpol_c2grad_lib_sp( + const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +void rbf_interpol_c2grad_lib_dp( + const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +void rbf_vec_interpol_cell_lib_sp( + const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +void rbf_vec_interpol_cell_lib_dp( + const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +void rbf_vec_interpol_edge_lib_dp( + const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +void rbf_vec_interpol_edge_lib_sp( + const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); } -- GitLab From 1be0c3ab1cec1bcbcf3ab6b446e16ccd4a58c2a5 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante4.lvt.dkrz.de> Date: Thu, 27 Feb 2025 11:22:56 +0100 Subject: [PATCH 21/33] Fix --- src/interpolation/mo_lib_intp_rbf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index b22e737..1cce6d8 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -108,7 +108,7 @@ template <typename T> void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, i + int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; -- GitLab From 0f5a6e7543abd5e54edf9b043b95bf3d4bca51c8 Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 4 Mar 2025 16:46:31 +0100 Subject: [PATCH 22/33] Added test for cell_lib --- src/interpolation/mo_lib_intp_rbf.cpp | 42 ++++++------ src/interpolation/mo_lib_intp_rbf.hpp | 4 +- .../mo_lib_intp_rbf_bindings.cpp | 8 +-- src/interpolation/mo_lib_intp_rbf_bindings.h | 4 +- test/c/test_intp_rbf.cpp | 64 ++++++++++++++++++- 5 files changed, 92 insertions(+), 30 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 1cce6d8..bdbfbe5 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -93,13 +93,13 @@ template void rbf_interpol_c2grad_lib<double>(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); template void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); //------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- @@ -108,7 +108,7 @@ template <typename T> void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -117,20 +117,20 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblk_c); - UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblk_c); - UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblk_c); //TODO - UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblk_c); - UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblk_c); + UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblks_c); + UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblks_c); + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblks_c); //TODO + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("rbf_vec_interpol_cell_lib", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), KOKKOS_LAMBDA(const int jk, const int jc){ p_u_out_view(jc, jk, jb) = @@ -154,23 +154,23 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); p_v_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 2, jc, jb)* + rbf_vec_coeff_c_view(0, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 2, jc, jb)* + rbf_vec_coeff_c_view(1, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 2, jc, jb)* + rbf_vec_coeff_c_view(2, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 2, jc, jb)* + rbf_vec_coeff_c_view(3, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 2, jc, jb)* + rbf_vec_coeff_c_view(4, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 2, jc, jb)* + rbf_vec_coeff_c_view(5, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 2, jc, jb)* + rbf_vec_coeff_c_view(6, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 2, jc, jb)* + rbf_vec_coeff_c_view(7, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 2, jc, jb)* + rbf_vec_coeff_c_view(8, 1, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); }); Kokkos::fence(); @@ -181,13 +181,13 @@ template void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); template void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); //------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 1e95737..5b9d5e1 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -18,14 +18,14 @@ template <typename T> void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); template <typename T> void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); template <typename T> diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index 2932f24..b679619 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -41,24 +41,24 @@ void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_i void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async); + slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async){ rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async); + slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index 9473aef..cc1dd5c 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -28,14 +28,14 @@ void rbf_vec_interpol_cell_lib_sp( const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); void rbf_vec_interpol_cell_lib_dp( const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); void rbf_vec_interpol_edge_lib_dp( diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index a6aa11a..9414061 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -12,6 +12,8 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> +#include <algorithm> +#include <numeric> #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" #include "mo_lib_intp_rbf.hpp" @@ -46,7 +48,9 @@ public: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 4; // vertical levels static constexpr int nblks_c = 2; // number of blocks for the cell arrays + static constexpr int nblks_e = 2; static constexpr int rbf_c2grad_dim = 10; // fixed dimension for the RBF c2 gradient + static constexpr int rbf_vec_dim_c = 9; // Parameters for the index ranges. int i_startblk = 0; @@ -56,23 +60,43 @@ public: int slev = 0; int elev = nlev-1; bool lacc = false; // Not using any ACC-specific behavior. + bool acc_async = false; // Data arrays. std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_vec_idx_c; //size: rbf_vec_dim_c * nproma * nblks_c + std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c std::vector<T> grad_x; // size: nproma * nlev * nblks_c std::vector<T> grad_y; // size: nproma * nlev * nblks_c + std::vector<T> p_vn_in; + std::vector<T> rbf_vec_coeff_c; + std::vector<T> p_u_out; + std::vector<T> p_v_out; RbfInterpolTypedTestFixture() { size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; + + size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; + size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; + p_cell_in.resize(size3d, static_cast<T>(1)); + p_vn_in.resize(size3d, static_cast<T>(1)); + + rbf_vec_idx_c.resize(size3d_vec_dim, 1); + rbf_vec_blk_c.resize(size3d_vec_dim, 0); rbf_c2grad_idx.resize(size3d_idx, 1); rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + + rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1)); rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); + + p_u_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_v_out.resize(size3d_vec_dim, static_cast<T>(0)); grad_x.resize(size3d, static_cast<T>(0)); grad_y.resize(size3d, static_cast<T>(0)); } @@ -83,7 +107,7 @@ typedef ::testing::Types<float, double> MyTypes; TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) { - + using T = TypeParam; rbf_interpol_c2grad_lib<TypeParam>( this->p_cell_in.data(), this->rbf_c2grad_idx.data(), @@ -119,6 +143,44 @@ TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) { } } + +TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) { + using T = TypeParam; + + rbf_vec_interpol_cell_lib<T>( + this->p_vn_in.data(), + this->rbf_vec_idx_c.data(), + this->rbf_vec_blk_c.data(), + this->rbf_vec_coeff_c.data(), + this->p_u_out.data(), + this->p_v_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->rbf_c2grad_dim, + this->nlev, + this->nblks_c, + this->nblks_e, + this->lacc, + this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5)) + << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i; + // EXPECT_NEAR(this->p_v_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5)) + // << "p_v_out failure at block " << jb << ", level " << jk << ", index " << i; + } + } + } + } + // Define a typed test fixture for the functions which have different input and output types template <typename TypePair> class RbfVecInterpolMixedTestFixture : public ::testing::Test { -- GitLab From 8175944d3f080ec145d4ce59e2e6fdddb8d6153d Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Tue, 4 Mar 2025 16:57:03 +0100 Subject: [PATCH 23/33] Fixed strange typo --- src/horizontal/mo_lib_divrot.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90 index a2f2ad9..c1360f6 100644 --- a/src/horizontal/mo_lib_divrot.F90 +++ b/src/horizontal/mo_lib_divrot.F90 @@ -561,7 +561,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -800,7 +800,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1008,7 +1008,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1295,7 +1295,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & -- GitLab From 3afe4ffe25a1621a662d055f21b04aaf8f9e3fbb Mon Sep 17 00:00:00 2001 From: Ali Sedighi <k202194@levante0.lvt.dkrz.de> Date: Wed, 5 Mar 2025 14:57:12 +0100 Subject: [PATCH 24/33] Added test for edge_lib --- src/interpolation/mo_lib_intp_rbf.cpp | 22 ++++++------ test/c/test_intp_rbf.cpp | 52 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index bdbfbe5..76e4784 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -154,23 +154,23 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); p_v_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 1, jc, jb)* + rbf_vec_coeff_c_view(0, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 1, jc, jb)* + rbf_vec_coeff_c_view(1, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 1, jc, jb)* + rbf_vec_coeff_c_view(2, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 1, jc, jb)* + rbf_vec_coeff_c_view(3, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 1, jc, jb)* + rbf_vec_coeff_c_view(4, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 1, jc, jb)* + rbf_vec_coeff_c_view(5, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 1, jc, jb)* + rbf_vec_coeff_c_view(6, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 1, jc, jb)* + rbf_vec_coeff_c_view(7, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 1, jc, jb)* + rbf_vec_coeff_c_view(8, 2, jc, jb)* p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); }); Kokkos::fence(); @@ -210,13 +210,13 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("rbf_vec_interpol_edge_lib", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), + Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), KOKKOS_LAMBDA(const int jk, const int je){ p_vt_out_view(je, jk, jb) = diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 9414061..426a86b 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -51,6 +51,7 @@ public: static constexpr int nblks_e = 2; static constexpr int rbf_c2grad_dim = 10; // fixed dimension for the RBF c2 gradient static constexpr int rbf_vec_dim_c = 9; + static constexpr int rbf_vec_dim_e = 4; // Parameters for the index ranges. int i_startblk = 0; @@ -76,6 +77,12 @@ public: std::vector<T> p_u_out; std::vector<T> p_v_out; + std::vector<int> rbf_vec_idx_e; + std::vector<int> rbf_vec_blk_e; + std::vector<T> rbf_vec_coeff_e; + std::vector<T> p_vt_out; + + RbfInterpolTypedTestFixture() { size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; @@ -84,6 +91,9 @@ public: size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; + size_t size3d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; + size_t size_4d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; + p_cell_in.resize(size3d, static_cast<T>(1)); p_vn_in.resize(size3d, static_cast<T>(1)); @@ -91,12 +101,17 @@ public: rbf_vec_blk_c.resize(size3d_vec_dim, 0); rbf_c2grad_idx.resize(size3d_idx, 1); rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + rbf_vec_idx_e.resize(size3d_vec_dim, 1); + rbf_vec_blk_e.resize(size3d_vec_dim, 0); rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1)); rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); + rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1)); p_u_out.resize(size3d_vec_dim, static_cast<T>(0)); p_v_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_vt_out.resize(size3d_edge_lib, static_cast<T>(0)); + grad_x.resize(size3d, static_cast<T>(0)); grad_y.resize(size3d, static_cast<T>(0)); } @@ -181,6 +196,43 @@ TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) { } } + +TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){ + using T = TypeParam; + + + rbf_vec_interpol_edge_lib<T>( + this->p_vn_in.data(), + this->rbf_vec_idx_e.data(), + this->rbf_vec_blk_e.data(), + this->rbf_vec_coeff_e.data(), + this->p_vt_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nlev, + this->nproma, + this->rbf_vec_dim_e, + this->nblks_e, + this->lacc, + this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), static_cast<T>(1e-5)) + << "p_vt_out failure at block " << jb << ", level " << jk << ", index " << i; + } + } + } + +} + + // Define a typed test fixture for the functions which have different input and output types template <typename TypePair> class RbfVecInterpolMixedTestFixture : public ::testing::Test { -- GitLab From 9d414e59bb6ed9057b6335f7a5411f7eba352c30 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 10:57:03 +0100 Subject: [PATCH 25/33] put the instantiations at the end of the file --- src/interpolation/mo_lib_intp_rbf.cpp | 82 +++++++++++++++------------ 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 76e4784..f1a9fa4 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -89,18 +89,6 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons }//for }//void -template -void rbf_interpol_c2grad_lib<double>(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); - -template -void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); - //------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- @@ -177,19 +165,6 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const }//for }//void -template -void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); -template -void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); - //------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- template <typename T> @@ -230,16 +205,53 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const } template -void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); +void rbf_vec_interpol_cell_lib<double>( + const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async +); +template +void rbf_vec_interpol_cell_lib<float>( + const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async +); + +template +void rbf_interpol_c2grad_lib<double>( + const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc +); + +template +void rbf_interpol_c2grad_lib<float>( + const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc +); + +template +void rbf_vec_interpol_edge_lib<double>( + const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async +); template -void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); +void rbf_vec_interpol_edge_lib<float>( + const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async +); -- GitLab From a3e149a6fc76fbc261f754c66e18ecb9912073ac Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 11:48:00 +0100 Subject: [PATCH 26/33] moved intp_rbf routines from the temporary file to the main file fixed a bug --- src/interpolation/CMakeLists.txt | 2 - ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ------------------ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 --- ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ------------ ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 ----- src/interpolation/mo_lib_intp_rbf.cpp | 176 ++++++++++++++++ src/interpolation/mo_lib_intp_rbf.hpp | 20 ++ .../mo_lib_intp_rbf_bindings.cpp | 118 +++++++++++ src/interpolation/mo_lib_intp_rbf_bindings.h | 38 ++++ test/c/test_intp_rbf.cpp | 1 - 10 files changed, 352 insertions(+), 420 deletions(-) delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index d5af767..bf5b0ac 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -16,8 +16,6 @@ add_library( mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp interpolation_bindings.cpp mo_lib_intp_rbf_bindings.cpp mo_lib_intp_rbf.cpp) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp deleted file mode 100644 index c9b776e..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() -/// -/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf - -#include <type_traits> -#include <Kokkos_Core.hpp> -#include "mo_lib_loopindices.hpp" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - - -constexpr int rbf_vec_dim_v = 6; - -//------------------------------------------------------------------------- -// -// -//> -/// Performs vector RBF reconstruction at triangle vertices. -/// -/// Theory described in Narcowich and Ward (Math Comp. 1994) and -/// Bonaventura and Baudisch (Mox Report n. 75). -/// It takes edge based variables as input and combines them -/// into three dimensional cartesian vectors at each vertex. -/// -/// Two templated variables in order to support mixed precision. -/// Intended that type_traits::is_floating_point(T,S)==TRUE -/// precision(T) >= precision(S) -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - // Dimensions for the arrays. - const int nlev, const int nblks_e, const int nblks_v - ) -{ - /* -#ifdef DIM_ENABLE_GPU - if (lacc){ using MemSpace = Kokkos::CudaSpace; - } else { using MemSpace = Kokkos::HostSpace; } -#else - using MemSpace = Kokkos::HostSpace; -#endif - - */ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - - - - // input components of velocity or horizontal vorticity vectors at edge midpoints - // dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); - - // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex - // (rbf_vec_dim_v,nproma,nblks_v) - UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); - UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); - - // coefficients are working precision array containing the coefficients used for vector rbf interpolation - // at each tringle vertex (input is normal component), - // dim: (rbf_vec_dim_v,2,nproma,nblks_v) - UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); - - // reconstructed x-component (u) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); - // reconstructed y-component (v) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); - - // Local vars - //int jv, jk, jb; // integer over vertices, levels, and blocks, - int jb; // integer over vertices, levels, and blocks, - int i_startidx; // start index - int i_endidx; // end index - - for (jb=i_startblk; jb <= i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - // NOTE: Static indexes reduced by 1 from Fortran version - p_u_out_view(jv, jk, jb) = - ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - p_v_out_view(jv, jk, jb) = - ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - } - ); - } -} - -// Explicit instantiation - double precision -template -void rbf_vec_interpol_vertex_lib<double, double>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - single precision -template -void rbf_vec_interpol_vertex_lib<float, float>( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - mixed precision -template -void rbf_vec_interpol_vertex_lib<double, float>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp deleted file mode 100644 index c0b6f05..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_c - ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp deleted file mode 100644 index 06dc467..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); -} - - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - -void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h deleted file mode 100644 index 4356f88..0000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h +++ /dev/null @@ -1,54 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -extern "C" { - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -} \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index f1a9fa4..361710f 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -12,6 +12,119 @@ #include "mo_lib_intp_rbf.hpp" #include <Kokkos_Core.hpp> +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v + ) +{ + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + + + + // input components of velocity or horizontal vorticity vectors at edge midpoints + // dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex + // (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), + // dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + //int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb=i_startblk; jb <= i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + } + ); + } +} + template <typename T> void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, @@ -204,6 +317,69 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const } } +// Explicit instantiation - double precision +template +void rbf_vec_interpol_vertex_lib<double, double>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - single precision +template +void rbf_vec_interpol_vertex_lib<float, float>( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - mixed precision +template +void rbf_vec_interpol_vertex_lib<double, float>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + template void rbf_vec_interpol_cell_lib<double>( const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 5b9d5e1..53547fc 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -14,6 +14,26 @@ #include <Kokkos_Core.hpp> #include <vector> +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_c + ); + template <typename T> void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp index b679619..e8461e6 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp @@ -13,6 +13,124 @@ #include "mo_lib_intp_rbf_bindings.h" #include "mo_lib_intp_rbf.hpp" +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h index cc1dd5c..fb8e87c 100644 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf_bindings.h @@ -12,6 +12,44 @@ extern "C" { +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + void rbf_interpol_c2grad_lib_sp( const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 426a86b..924ff02 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -14,7 +14,6 @@ #include <vector> #include <algorithm> #include <numeric> -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" #include "mo_lib_intp_rbf.hpp" // Free-function helpers for 3D and 4D array sizes (assumed column-major) -- GitLab From f0f195141b717ccf9c243161cacab52b114cf535 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 12:15:27 +0100 Subject: [PATCH 27/33] deleted intp_rbf_bindings and merged its contents into interpolation_bindings --- src/interpolation/CMakeLists.txt | 5 +- src/interpolation/interpolation_bindings.cpp | 191 ++++++++++++++++ src/interpolation/interpolation_bindings.h | 78 +++++++ .../mo_lib_intp_rbf_bindings.cpp | 204 ------------------ src/interpolation/mo_lib_intp_rbf_bindings.h | 92 -------- 5 files changed, 271 insertions(+), 299 deletions(-) delete mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.cpp delete mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index bf5b0ac..96f281c 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -16,9 +16,8 @@ add_library( mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 - interpolation_bindings.cpp - mo_lib_intp_rbf_bindings.cpp - mo_lib_intp_rbf.cpp) + mo_lib_intp_rbf.cpp + interpolation_bindings.cpp) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 628f411..75449d6 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -12,6 +12,7 @@ #include "interpolation_bindings.h" #include "mo_lib_interpolation_scalar.hpp" #include "mo_lib_interpolation_vector.hpp" +#include "mo_lib_intp_rbf.hpp" // This is the binding for mo_interpolation_vector::edges2cells_vector_lib // (wp=dp) @@ -326,3 +327,193 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, avg_psi_c, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ + + rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ + + rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, + rbf_c2grad_coeff, + grad_x, grad_y, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async){ + + rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, + rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async){ + + rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ + + rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, + rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in,i_endidx_in, + slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} + +void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async){ + + rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, + rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, + slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async); +} diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 7cb873d..419446a 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -185,4 +185,82 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc); + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_interpol_c2grad_lib_sp( + const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, + const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +void rbf_interpol_c2grad_lib_dp( + const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, + const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + +void rbf_vec_interpol_cell_lib_sp( + const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +void rbf_vec_interpol_cell_lib_dp( + const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, + const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +void rbf_vec_interpol_edge_lib_dp( + const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const double* rbf_vec_coeff_e, double* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +void rbf_vec_interpol_edge_lib_sp( + const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, + const float* rbf_vec_coeff_e, float* p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); } diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp deleted file mode 100644 index e8461e6..0000000 --- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - - -#include "mo_lib_intp_rbf_bindings.h" -#include "mo_lib_intp_rbf.hpp" - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); -} - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - -void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - -void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - - rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); -} - -void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - - rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); -} - -void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async){ - - rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, - rbf_vec_coeff_c, p_u_out, p_v_out, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); -} - -void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async){ - - rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, - p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); -} - -void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ - - rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, - rbf_vec_coeff_e, p_vt_out, - i_startblk, i_endblk, i_startidx_in,i_endidx_in, - slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); -} - -void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ - - rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, - rbf_vec_coeff_e, p_vt_out, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async); -} diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h deleted file mode 100644 index fb8e87c..0000000 --- a/src/interpolation/mo_lib_intp_rbf_bindings.h +++ /dev/null @@ -1,92 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- -#pragma once - -extern "C" { - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -void rbf_interpol_c2grad_lib_sp( - const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); - -void rbf_interpol_c2grad_lib_dp( - const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); - -void rbf_vec_interpol_cell_lib_sp( - const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); - -void rbf_vec_interpol_cell_lib_dp( - const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); - -void rbf_vec_interpol_edge_lib_dp( - const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); - -void rbf_vec_interpol_edge_lib_sp( - const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); -} -- GitLab From ed5c21b6247e11166e6d27b3bc5d3741a5664117 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 14:15:28 +0100 Subject: [PATCH 28/33] made changes to test_intp_rbf --- test/c/test_intp_rbf.cpp | 70 +++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 924ff02..983352c 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -39,38 +39,43 @@ typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<double, float>, MixedPrecision<float, float> > MixedTypes; -// Define a typed test fixture for the functions which have the same input and output types -template <typename T> -class RbfInterpolTypedTestFixture : public ::testing::Test { +class interp_dimensions { public: - - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // vertical levels - static constexpr int nblks_c = 2; // number of blocks for the cell arrays - static constexpr int nblks_e = 2; - static constexpr int rbf_c2grad_dim = 10; // fixed dimension for the RBF c2 gradient + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks + static constexpr int nblks_v = 2; // number of vertex blocks + static constexpr int rbf_c2grad_dim = 10; // fixed dimension static constexpr int rbf_vec_dim_c = 9; static constexpr int rbf_vec_dim_e = 4; - // Parameters for the index ranges. - int i_startblk = 0; - int i_endblk = 1; // Note: loop over jb uses jb < i_endblk, so blocks 0 and 1 - int i_startidx_in = 0; - int i_endidx_in = nproma-1; // [0, nproma) i.e. 0 .. nproma-1 - int slev = 0; - int elev = nlev-1; - bool lacc = false; // Not using any ACC-specific behavior. - bool acc_async = false; + // Parameter values. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx_in = 0; + const int i_endidx_in = nproma - 1; + const int slev = 0; + const int elev = nlev - 1; + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. +}; + +// Define a typed test fixture for the functions which have the same input and output types +template <typename T> +class RbfInterpolTypedTestFixture : public ::testing::Test, public interp_dimensions { +public: // Data arrays. - std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c - std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c - std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c - std::vector<int> rbf_vec_idx_c; //size: rbf_vec_dim_c * nproma * nblks_c - std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c - std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c - std::vector<T> grad_x; // size: nproma * nlev * nblks_c - std::vector<T> grad_y; // size: nproma * nlev * nblks_c + std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c + std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_vec_idx_c; //size: rbf_vec_dim_c * nproma * nblks_c + std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c + std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c + std::vector<T> grad_x; // size: nproma * nlev * nblks_c + std::vector<T> grad_y; // size: nproma * nlev * nblks_c std::vector<T> p_vn_in; std::vector<T> rbf_vec_coeff_c; std::vector<T> p_u_out; @@ -120,7 +125,7 @@ typedef ::testing::Types<float, double> MyTypes; TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); -TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) { +TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { using T = TypeParam; rbf_interpol_c2grad_lib<TypeParam>( this->p_cell_in.data(), @@ -158,7 +163,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) { } -TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) { +TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { using T = TypeParam; rbf_vec_interpol_cell_lib<T>( @@ -188,15 +193,12 @@ TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) { size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5)) << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i; - // EXPECT_NEAR(this->p_v_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5)) - // << "p_v_out failure at block " << jb << ", level " << jk << ", index " << i; } } } } - -TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){ +TYPED_TEST(RbfInterpolTypedTestFixture, Egde){ using T = TypeParam; @@ -234,7 +236,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){ // Define a typed test fixture for the functions which have different input and output types template <typename TypePair> -class RbfVecInterpolMixedTestFixture : public ::testing::Test { +class RbfVecInterpolMixedTestFixture : public ::testing::Test, public interp_dimensions { public: using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; @@ -279,7 +281,7 @@ public: TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); -TYPED_TEST(RbfVecInterpolMixedTestFixture, BasicTest) { +TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; -- GitLab From eb4096e188f939d5dc311c095cc403a8257a27fe Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 14:19:33 +0100 Subject: [PATCH 29/33] made few changes to test_interpolation_scalar --- test/c/test_interpolation_scalar.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 0ee7fa3..507ec3f 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -48,8 +48,7 @@ public: static constexpr int nlev = 7; // number of vertical levels static constexpr int nblks_c = 2; // number of cell blocks static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = - 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int nblks_v = 2; // number of vertex blocks // Parameter values. const int i_startblk = 0; @@ -385,7 +384,7 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -424,7 +423,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -496,7 +495,7 @@ public: TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test -TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { +TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; -- GitLab From 1d49b1bde48b2f43dd65c63c04517e4e7332b761 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 15:07:23 +0100 Subject: [PATCH 30/33] formatted the c codes using clang-format --- src/interpolation/interpolation_bindings.cpp | 262 +++--- src/interpolation/interpolation_bindings.h | 103 +-- src/interpolation/mo_lib_intp_rbf.cpp | 798 ++++++++++--------- src/interpolation/mo_lib_intp_rbf.hpp | 56 +- test/c/test_intp_rbf.cpp | 319 ++++---- 5 files changed, 732 insertions(+), 806 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 75449d6..a13ad0f 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -329,191 +329,121 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, } void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); } void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); } void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); } -void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - - rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +void rbf_interpol_c2grad_lib_sp(const float *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<float>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } -void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){ - - rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, - rbf_c2grad_coeff, - grad_x, grad_y, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +void rbf_interpol_c2grad_lib_dp(const double *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<double>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } -void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async){ +void rbf_vec_interpol_cell_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { - rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, - rbf_vec_coeff_c, p_u_out, p_v_out, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); + rbf_vec_interpol_cell_lib<float>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } -void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async){ +void rbf_vec_interpol_cell_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { - rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, - p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); + rbf_vec_interpol_cell_lib<double>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } -void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ - - rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, - rbf_vec_coeff_e, p_vt_out, - i_startblk, i_endblk, i_startidx_in,i_endidx_in, - slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +void rbf_vec_interpol_edge_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async) { + + rbf_vec_interpol_edge_lib<double>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); } -void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ +void rbf_vec_interpol_edge_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async) { - rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, - rbf_vec_coeff_e, p_vt_out, - i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async); + rbf_vec_interpol_edge_lib<float>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); } diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 419446a..0f4778d 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -187,80 +187,59 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, const int nlev, const int nblks_c, const bool lacc); void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); void rbf_interpol_c2grad_lib_sp( - const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); void rbf_interpol_c2grad_lib_dp( - const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc); + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); void rbf_vec_interpol_cell_lib_sp( - const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); void rbf_vec_interpol_cell_lib_dp( - const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async); + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); void rbf_vec_interpol_edge_lib_dp( - const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); void rbf_vec_interpol_edge_lib_sp( - const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); } diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index 361710f..d1178a6 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -30,404 +30,446 @@ constexpr int rbf_vec_dim_v = 6; /// precision(T) >= precision(S) template <typename T, typename S> void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc // Dimensions for the arrays. - const int nlev, const int nblks_e, const int nblks_v - ) -{ - /* + const int nlev, const int nblks_e, const int nblks_v) { + /* #ifdef DIM_ENABLE_GPU - if (lacc){ using MemSpace = Kokkos::CudaSpace; - } else { using MemSpace = Kokkos::HostSpace; } + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } #else - using MemSpace = Kokkos::HostSpace; + using MemSpace = Kokkos::HostSpace; #endif - */ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - - - - // input components of velocity or horizontal vorticity vectors at edge midpoints - // dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); - - // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex - // (rbf_vec_dim_v,nproma,nblks_v) - UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); - UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); - - // coefficients are working precision array containing the coefficients used for vector rbf interpolation - // at each tringle vertex (input is normal component), - // dim: (rbf_vec_dim_v,2,nproma,nblks_v) - UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); - - // reconstructed x-component (u) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); - // reconstructed y-component (v) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); - - // Local vars - //int jv, jk, jb; // integer over vertices, levels, and blocks, - int jb; // integer over vertices, levels, and blocks, - int i_startidx; // start index - int i_endidx; // end index - - for (jb=i_startblk; jb <= i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - // NOTE: Static indexes reduced by 1 from Fortran version - p_u_out_view(jv, jk, jb) = - ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - p_v_out_view(jv, jk, jb) = - ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - } - ); - } + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + + // input components of velocity or horizontal vorticity vectors at edge + // midpoints dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf + // interpolation at each triangle vertex (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used + // for vector rbf interpolation at each tringle vertex (input is normal + // component), dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, + nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + // int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb = i_startblk; jb <= i_endblk; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + }); + } } template <typename T> -void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc){ - - //aliases for unmanaged Kokkos views - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - - //to avoid memory ownership issues - UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); - UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c); - UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); - UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); - UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); - - for (int jb = i_startblk; jb <= i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::parallel_for("rbf_interpol_c2grad", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), - KOKKOS_LAMBDA(const int jk, const int jc){ - - grad_x_view(jc, jk, jb) = - rbf_c2grad_coeff_view(0, 1, jc, jb)* - p_cell_in_view(jc, jk, jb) + - rbf_c2grad_coeff_view(1, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + - rbf_c2grad_coeff_view(2, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + - rbf_c2grad_coeff_view(3, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + - rbf_c2grad_coeff_view(4, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + - rbf_c2grad_coeff_view(5, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + - rbf_c2grad_coeff_view(6, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + - rbf_c2grad_coeff_view(7, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + - rbf_c2grad_coeff_view(8, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + - rbf_c2grad_coeff_view(9, 1, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); - - grad_y_view(jc, jk, jb) = - rbf_c2grad_coeff_view(0, 2, jc, jb)* - p_cell_in_view(jc, jk, jb) + - rbf_c2grad_coeff_view(1, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + - rbf_c2grad_coeff_view(2, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + - rbf_c2grad_coeff_view(3, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + - rbf_c2grad_coeff_view(4, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + - rbf_c2grad_coeff_view(5, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + - rbf_c2grad_coeff_view(6, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + - rbf_c2grad_coeff_view(7, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + - rbf_c2grad_coeff_view(8, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + - rbf_c2grad_coeff_view(9, 2, jc, jb)* - p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); - }); - - }//for -}//void - +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc) { + + // aliases for unmanaged Kokkos views + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + // to avoid memory ownership issues + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); + UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c); + UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, + nproma, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_interpol_c2grad", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + grad_x_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + + grad_y_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + }); + + } // for +} // void //------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- template <typename T> -void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async){ - - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - - UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblks_c); - UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblks_c); - UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblks_c); //TODO - UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); - UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); - - - for (int jb = i_startblk; jb <= i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::parallel_for("rbf_vec_interpol_cell_lib", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), - KOKKOS_LAMBDA(const int jk, const int jc){ - - p_u_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 1, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); - - p_v_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 2, jc, jb)* - p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); - }); - Kokkos::fence(); - }//for -}//void +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, + nblks_c); // TODO + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_cell_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_u_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + + p_v_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + }); + Kokkos::fence(); + } // for +} // void //------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- template <typename T> -void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const T* rbf_vec_coeff_e, T* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async){ - - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma,nlev,nblks_e); - UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e,nproma,nblks_e); - UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e,nproma,nblks_e); - UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e,nproma,nblks_e); - UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e); - - - for (int jb = i_startblk; jb <= i_endblk; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::parallel_for("rbf_vec_interpol_edge_lib", - Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}), - KOKKOS_LAMBDA(const int jk, const int je){ - - p_vt_out_view(je, jk, jb) = - rbf_vec_coeff_e_view(0, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, rbf_vec_blk_e_view(0, je, jb)) + - rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) + - rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) + - rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)); - - }); - } +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedT3D p_vt_out_view(p_vt_out, nproma, nlev, nblks_e); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_edge_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_vt_out_view(je, jk, jb) = + rbf_vec_coeff_e_view(0, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, + rbf_vec_blk_e_view(0, je, jb)) + + rbf_vec_coeff_e_view(1, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, + rbf_vec_blk_e_view(1, je, jb)) + + rbf_vec_coeff_e_view(2, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, + rbf_vec_blk_e_view(2, je, jb)) + + rbf_vec_coeff_e_view(3, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, + rbf_vec_blk_e_view(3, je, jb)); + }); + } } // Explicit instantiation - double precision -template -void rbf_vec_interpol_vertex_lib<double, double>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); +template void rbf_vec_interpol_vertex_lib<double, double>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); // Explicit instantiation - single precision -template -void rbf_vec_interpol_vertex_lib<float, float>( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); +template void rbf_vec_interpol_vertex_lib<float, float>( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); // Explicit instantiation - mixed precision -template -void rbf_vec_interpol_vertex_lib<double, float>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -template -void rbf_vec_interpol_cell_lib<double>( - const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async -); - -template -void rbf_vec_interpol_cell_lib<float>( - const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, - int rbf_vec_dim_c, bool lacc, bool acc_async -); - -template -void rbf_interpol_c2grad_lib<double>( - const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const double* rbf_c2grad_coeff, double* grad_x, double* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc -); - -template -void rbf_interpol_c2grad_lib<float>( - const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const float* rbf_c2grad_coeff, float* grad_x, float* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc -); - -template -void rbf_vec_interpol_edge_lib<double>( - const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const double* rbf_vec_coeff_e, double* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async -); - -template -void rbf_vec_interpol_edge_lib<float>( - const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const float* rbf_vec_coeff_e, float* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async -); +template void rbf_vec_interpol_vertex_lib<double, float>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +template void rbf_vec_interpol_cell_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_vec_interpol_cell_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_interpol_c2grad_lib<double>( + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_interpol_c2grad_lib<float>( + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_vec_interpol_edge_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +template void rbf_vec_interpol_edge_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp index 53547fc..8a85502 100644 --- a/src/interpolation/mo_lib_intp_rbf.hpp +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -16,41 +16,35 @@ template <typename T, typename S> void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_c - ); + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const bool lacc, + const bool acc_async, const int nlev, const int nblks_e, const int nblks_c); template <typename T> -void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk, - const T* rbf_c2grad_coeff, T* grad_x, T* grad_y, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc); - +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc); template <typename T> -void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c, - const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e, +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async); template <typename T> -void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e, - const T* rbf_vec_coeff_e, T* p_vt_out, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, - bool acc_async); +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async); diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 983352c..cdee264 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -9,45 +9,43 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> +#include "mo_lib_intp_rbf.hpp" #include <Kokkos_Core.hpp> -#include <vector> #include <algorithm> +#include <gtest/gtest.h> #include <numeric> -#include "mo_lib_intp_rbf.hpp" +#include <vector> // Free-function helpers for 3D and 4D array sizes (assumed column-major) -template<typename T> -size_t num_elements_3d(int d1, int d2, int d3) { +template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { return static_cast<size_t>(d1) * d2 * d3; } -template<typename T> -size_t num_elements_4d(int d1, int d2, int d3, int d4) { +template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { return static_cast<size_t>(d1) * d2 * d3 * d4; } // Define a helper struct that holds the two types. -template<typename InT, typename OutT> -struct MixedPrecision { - using in_type = InT; +template <typename InT, typename OutT> struct MixedPrecision { + using in_type = InT; using out_type = OutT; }; // Define the list of type pairs we want to test. -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<double, float>, - MixedPrecision<float, float> > MixedTypes; +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float>> + MixedTypes; class interp_dimensions { public: // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_c = 2; // number of cell blocks - static constexpr int nblks_e = 2; // number of edge blocks - static constexpr int nblks_v = 2; // number of vertex blocks - static constexpr int rbf_c2grad_dim = 10; // fixed dimension + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks + static constexpr int nblks_v = 2; // number of vertex blocks + static constexpr int rbf_c2grad_dim = 10; // fixed dimension static constexpr int rbf_vec_dim_c = 9; static constexpr int rbf_vec_dim_e = 4; @@ -58,22 +56,24 @@ public: const int i_endidx_in = nproma - 1; const int slev = 0; const int elev = nlev - 1; - const bool lacc = false; // Not using ACC-specific behavior. + const bool lacc = false; // Not using ACC-specific behavior. const bool acc_async = false; // No asynchronous execution. }; -// Define a typed test fixture for the functions which have the same input and output types +// Define a typed test fixture for the functions which have the same input and +// output types template <typename T> -class RbfInterpolTypedTestFixture : public ::testing::Test, public interp_dimensions { +class RbfInterpolTypedTestFixture : public ::testing::Test, + public interp_dimensions { public: - // Data arrays. - std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c + std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c - std::vector<int> rbf_vec_idx_c; //size: rbf_vec_dim_c * nproma * nblks_c + std::vector<int> rbf_vec_idx_c; // size: rbf_vec_dim_c * nproma * nblks_c std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c - std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c + std::vector<T> + rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c std::vector<T> grad_x; // size: nproma * nlev * nblks_c std::vector<T> grad_y; // size: nproma * nlev * nblks_c std::vector<T> p_vn_in; @@ -86,17 +86,20 @@ public: std::vector<T> rbf_vec_coeff_e; std::vector<T> p_vt_out; - RbfInterpolTypedTestFixture() { size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; - size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; - size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; + size_t size3d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; + size_t size_4d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; - size_t size3d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; - size_t size_4d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; + size_t size3d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; + size_t size_4d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; p_cell_in.resize(size3d, static_cast<T>(1)); p_vn_in.resize(size3d, static_cast<T>(1)); @@ -104,7 +107,7 @@ public: rbf_vec_idx_c.resize(size3d_vec_dim, 1); rbf_vec_blk_c.resize(size3d_vec_dim, 0); rbf_c2grad_idx.resize(size3d_idx, 1); - rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. rbf_vec_idx_e.resize(size3d_vec_dim, 1); rbf_vec_blk_e.resize(size3d_vec_dim, 0); @@ -128,23 +131,12 @@ TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { using T = TypeParam; rbf_interpol_c2grad_lib<TypeParam>( - this->p_cell_in.data(), - this->rbf_c2grad_idx.data(), - this->rbf_c2grad_blk.data(), - this->rbf_c2grad_coeff.data(), - this->grad_x.data(), - this->grad_y.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nproma, - this->rbf_c2grad_dim, - this->nlev, - this->nblks_c, - this->lacc); + this->p_cell_in.data(), this->rbf_c2grad_idx.data(), + this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(), + this->grad_x.data(), this->grad_y.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->lacc); // For each block from i_startblk to i_endblk-1, and for each (i, level) // the kernel sums rbf_c2grad_dim contributions, each equal to 1. @@ -152,171 +144,160 @@ TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { for (int jk = 0; jk < this->nlev; ++jk) { for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->grad_x[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5)) - << "grad_x failure at block " << jb << ", level " << jk << ", index " << i; - EXPECT_NEAR(this->grad_y[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5)) - << "grad_y failure at block " << jb << ", level " << jk << ", index " << i; + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->grad_x[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk + << ", index " << i; + EXPECT_NEAR(this->grad_y[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk + << ", index " << i; } } } } - TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { - using T = TypeParam; - - rbf_vec_interpol_cell_lib<T>( - this->p_vn_in.data(), - this->rbf_vec_idx_c.data(), - this->rbf_vec_blk_c.data(), - this->rbf_vec_coeff_c.data(), - this->p_u_out.data(), - this->p_v_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nproma, - this->rbf_c2grad_dim, - this->nlev, - this->nblks_c, - this->nblks_e, - this->lacc, - this->acc_async); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - for (int jk = 0; jk < this->nlev; ++jk) { - for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5)) - << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i; - } - } + using T = TypeParam; + + rbf_vec_interpol_cell_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_c.data(), + this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->nblks_e, this->lacc, this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), + static_cast<T>(1e-5)) + << "p_u_out failure at block " << jb << ", level " << jk + << ", index " << i; } + } } +} -TYPED_TEST(RbfInterpolTypedTestFixture, Egde){ - using T = TypeParam; - - - rbf_vec_interpol_edge_lib<T>( - this->p_vn_in.data(), - this->rbf_vec_idx_e.data(), - this->rbf_vec_blk_e.data(), - this->rbf_vec_coeff_e.data(), - this->p_vt_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nlev, - this->nproma, - this->rbf_vec_dim_e, - this->nblks_e, - this->lacc, - this->acc_async); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - for (int jk = 0; jk < this->nlev; ++jk) { - for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), static_cast<T>(1e-5)) - << "p_vt_out failure at block " << jb << ", level " << jk << ", index " << i; - } - } - } +TYPED_TEST(RbfInterpolTypedTestFixture, Egde) { + using T = TypeParam; -} + rbf_vec_interpol_edge_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_e.data(), + this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(), + this->p_vt_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, + this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc, + this->acc_async); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), + static_cast<T>(1e-5)) + << "p_vt_out failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} -// Define a typed test fixture for the functions which have different input and output types +// Define a typed test fixture for the functions which have different input and +// output types template <typename TypePair> -class RbfVecInterpolMixedTestFixture : public ::testing::Test, public interp_dimensions { +class RbfVecInterpolMixedTestFixture : public ::testing::Test, + public interp_dimensions { public: - using InType = typename TypePair::in_type; + using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = + 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = + 6; // fixed dimension for rbf vector (stencil points) // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] int i_startidx_in = 0; - int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. // Arrays stored in std::vector. - std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) - std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) - std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> + rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) RbfVecInterpolMixedTestFixture() { // Allocate and initialize inputs. - p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), + static_cast<InType>(1)); rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); - rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + rbf_vec_coeff_v.resize( + num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), + static_cast<InType>(1)); // Allocate output arrays and initialize to zero. - p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); } }; TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) { - using InType = typename TestFixture::InType; + using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; // Call the function with mixed precision. rbf_vec_interpol_vertex_lib<InType, OutType>( - this->p_e_in.data(), - this->rbf_vec_idx_v.data(), - this->rbf_vec_blk_v.data(), - this->rbf_vec_coeff_v.data(), - this->p_u_out.data(), - this->p_v_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nproma, - this->lacc, - this->acc_async, - this->nlev, - this->nblks_e, - this->nblks_v); + this->p_e_in.data(), this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->lacc, this->acc_async, this->nlev, + this->nblks_e, this->nblks_v); // Check the outputs only for blocks in the range [i_startblk, i_endblk]. for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = 0; level < this->nlev; ++level) { for (int i = 0; i < this->nproma; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } -- GitLab From 2cbc4234c219596f9f90816724d6cb5555ddea3a Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 18:36:01 +0100 Subject: [PATCH 31/33] added some comments to the bindings --- src/interpolation/interpolation_bindings.cpp | 11 ++++++++++- src/interpolation/interpolation_bindings.h | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index a13ad0f..4524ad7 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -328,6 +328,7 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dp_lib void rbf_vec_interpol_vertex_lib_dp( const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, @@ -341,6 +342,7 @@ void rbf_vec_interpol_vertex_lib_dp( lacc, acc_async, nlev, nblks_e, nblks_v); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_sp_lib void rbf_vec_interpol_vertex_lib_sp( const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, @@ -354,7 +356,8 @@ void rbf_vec_interpol_vertex_lib_sp( lacc, acc_async, nlev, nblks_e, nblks_v); } -void rbf_vec_interpol_vertex_lib_mixprec( +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dpsp_lib +void rbf_vec_interpol_vertex_lib_dpsp( const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, const int i_startblk, const int i_endblk, const int i_startidx_in, @@ -367,6 +370,7 @@ void rbf_vec_interpol_vertex_lib_mixprec( lacc, acc_async, nlev, nblks_e, nblks_v); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) void rbf_interpol_c2grad_lib_sp(const float *p_cell_in, const int *rbf_c2grad_idx, const int *rbf_c2grad_blk, @@ -382,6 +386,7 @@ void rbf_interpol_c2grad_lib_sp(const float *p_cell_in, nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) void rbf_interpol_c2grad_lib_dp(const double *p_cell_in, const int *rbf_c2grad_idx, const int *rbf_c2grad_blk, @@ -397,6 +402,7 @@ void rbf_interpol_c2grad_lib_dp(const double *p_cell_in, nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) void rbf_vec_interpol_cell_lib_sp( const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, @@ -410,6 +416,7 @@ void rbf_vec_interpol_cell_lib_sp( nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) void rbf_vec_interpol_cell_lib_dp( const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, @@ -423,6 +430,7 @@ void rbf_vec_interpol_cell_lib_dp( nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=dp) void rbf_vec_interpol_edge_lib_dp( const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, @@ -436,6 +444,7 @@ void rbf_vec_interpol_edge_lib_dp( nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); } +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=sp) void rbf_vec_interpol_edge_lib_sp( const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 0f4778d..64c6a8c 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -202,6 +202,14 @@ void rbf_vec_interpol_vertex_lib_sp( const bool lacc, const bool acc_async, const int nlev, const int nblks_e, const int nblks_v); +void rbf_vec_interpol_vertex_lib_dpsp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + void rbf_interpol_c2grad_lib_sp( const float *p_cell_in, const int *rbf_c2grad_idx, const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, -- GitLab From 399e08caf80cc42ca7925ae1b7f7f921e9df0aef Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 18:42:27 +0100 Subject: [PATCH 32/33] fixed a typo --- test/c/test_intp_rbf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index cdee264..040d440 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -186,7 +186,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { } } -TYPED_TEST(RbfInterpolTypedTestFixture, Egde) { +TYPED_TEST(RbfInterpolTypedTestFixture, Edge) { using T = TypeParam; rbf_vec_interpol_edge_lib<T>( -- GitLab From 609c5e5804c1b75ccff24ec3ce53126f0cc1a7d7 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 6 Mar 2025 18:48:50 +0100 Subject: [PATCH 33/33] Revert "Fixed strange typo" This reverts commit 8175944d3f080ec145d4ce59e2e6fdddb8d6153d. --- src/horizontal/mo_lib_divrot.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90 index c1360f6..a2f2ad9 100644 --- a/src/horizontal/mo_lib_divrot.F90 +++ b/src/horizontal/mo_lib_divrot.F90 @@ -561,7 +561,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -800,7 +800,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1008,7 +1008,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, & @@ -1295,7 +1295,7 @@ CONTAINS !! !! !LITERATURE !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh -!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys., +!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys., !! 181, 729-752 !! SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, & -- GitLab