From 0ed4e08e45bb73927f1496fcbbbb57d5b99d2767 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Mon, 24 Feb 2025 11:53:11 +0100 Subject: [PATCH 01/36] structure --- src/interpolation/CMakeLists.txt | 3 + .../mo_lib_interpolation_vector.cpp | 167 +++++++----------- .../mo_lib_interpolation_vector.hpp | 33 ++-- test/c/test_interpolation_vector.cpp | 5 +- 4 files changed, 84 insertions(+), 124 deletions(-) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 37c3ad0..01a20eb 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -13,10 +13,13 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 + mo_lib_interpolation_vector.hpp mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp + interpolation_bindings.h + interpolation_bindings.cpp ) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 00a914a..40ef003 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -1,24 +1,7 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_vector.hpp" -// The templated C++ function using Kokkos. -// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. -// Note: The dimensions below must match the Fortran arrays. -// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] -// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] -// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] -// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] + + template <typename T> void edges2cells_vector_lib( const T* p_vn_in, const T* p_vt_in, @@ -33,109 +16,95 @@ void edges2cells_vector_lib( // Dimensions for the arrays. int nlev, int nblks_e, int nblks_c) { - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); - UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); - UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); - UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); - UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); - UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); - // Loop over cell blocks as in the original Fortran code. - for (int jb = i_startblk; jb <= i_endblk; ++jb) { - // Call get_indices_c_lib to get inner loop indices for block jb. - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, - jb, i_startblk, i_endblk, - i_startidx, i_endidx); + // Loop over cell blocks as in the original Fortran code. + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Call get_indices_c_lib to get inner loop indices for block jb. + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - Kokkos::parallel_for("edges2cells_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - // Compute the bilinear interpolation for cell (jc, jk, jb). - p_u_out_view(jc, jk, jb) = - e_bln_c_u_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_u_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for("edges2cells_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); - p_v_out_view(jc, jk, jb) = - e_bln_c_v_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_v_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); - }); - // Optionally fence after each block if required. - Kokkos::fence(); - } + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + }); + // Optionally fence after each block if required. + Kokkos::fence(); + } } -extern "C" void edges2cells_vector_lib_dp( +template +void edges2cells_vector_lib<double>( const double* p_vn_in, const double* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, const double* e_bln_c_u, const double* e_bln_c_v, double* p_u_out, double* p_v_out, + // Additional integer parameters. int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - int nlev, int nblks_e, int nblks_c) -{ - edges2cells_vector_lib<double>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); -} + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); -extern "C" void edges2cells_vector_lib_sp( +template +void edges2cells_vector_lib<float>( const float* p_vn_in, const float* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, const float* e_bln_c_u, const float* e_bln_c_v, float* p_u_out, float* p_v_out, + // Additional integer parameters. int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - int nlev, int nblks_e, int nblks_c) -{ - edges2cells_vector_lib<float>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); -} + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp index 0d19b24..1ee6c5b 100644 --- a/src/interpolation/mo_lib_interpolation_vector.hpp +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -8,10 +8,19 @@ // See LICENSES/ for license information // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#pragma once +#include "mo_lib_loopindices.hpp" #include <Kokkos_Core.hpp> #include <vector> +// The templated C++ function using Kokkos. +// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. +// Note: The dimensions below must match the Fortran arrays. +// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] +// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] +// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] +// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] template <typename T> void edges2cells_vector_lib( const T* p_vn_in, const T* p_vt_in, @@ -24,26 +33,4 @@ void edges2cells_vector_lib( int slev, int elev, int nproma, // Dimensions for the arrays. - int nlev, int nblks_e, int nblks_c); - -extern "C" void edges2cells_vector_lib_dp( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); - -extern "C" void edges2cells_vector_lib_sp( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); + int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index 0eb5a8d..974efcd 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -12,6 +12,7 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> + #include "mo_lib_interpolation_vector.hpp" // Dimensions for the test (small, trivial test). @@ -62,7 +63,7 @@ TEST(Edges2CellsTest, DPTest) { std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); // Call the dp (double precision) version. - edges2cells_vector_lib_dp( + edges2cells_vector_lib<double>( p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), @@ -103,7 +104,7 @@ TEST(Edges2CellsTest, SPTest) { std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); // Call the sp (float precision) version. - edges2cells_vector_lib_sp( + edges2cells_vector_lib<float>( p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), -- GitLab From 4b9ef5dbc75f803f4e67adf7d40cddc529c59837 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Mon, 24 Feb 2025 12:45:08 +0100 Subject: [PATCH 02/36] fix cmake --- src/interpolation/CMakeLists.txt | 1 - src/interpolation/interpolation_bindings.cpp | 36 ++++++++++++++++++++ src/interpolation/interpolation_bindings.h | 23 +++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 src/interpolation/interpolation_bindings.cpp create mode 100644 src/interpolation/interpolation_bindings.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 01a20eb..eade738 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -13,7 +13,6 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 - mo_lib_interpolation_vector.hpp mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp new file mode 100644 index 0000000..daa1c44 --- /dev/null +++ b/src/interpolation/interpolation_bindings.cpp @@ -0,0 +1,36 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "interpolation_bindings.h" +#include "mo_lib_interpolation_vector.hpp" + +void edges2cells_vector_lib_c( + const wp* p_vn_in, const wp* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const wp* e_bln_c_u, const wp* e_bln_c_v, + wp* p_u_out, wp* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<wp>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h new file mode 100644 index 0000000..a712f53 --- /dev/null +++ b/src/interpolation/interpolation_bindings.h @@ -0,0 +1,23 @@ +#pragma once + +#ifdef __SINGLE_PRECISION + using wp = single; +#else + using wp = double; +#endif + + +extern "C"{ + + void edges2cells_vector_lib_c( + const wp* p_vn_in, const wp* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const wp* e_bln_c_u, const wp* e_bln_c_v, + wp* p_u_out, wp* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); + +} \ No newline at end of file -- GitLab From d9ba3f7897888f8495f87439a78d766a056ce01f Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Mon, 24 Feb 2025 12:46:55 +0100 Subject: [PATCH 03/36] add license header --- src/interpolation/interpolation_bindings.h | 10 ++++++++++ src/interpolation/mo_lib_interpolation_vector.cpp | 13 +++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index a712f53..5134365 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -1,3 +1,13 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- #pragma once #ifdef __SINGLE_PRECISION diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 40ef003..74c2be7 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -1,6 +1,15 @@ -#include "mo_lib_interpolation_vector.hpp" - +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#include "mo_lib_interpolation_vector.hpp" template <typename T> void edges2cells_vector_lib( -- GitLab From 14b309458936d393dc93eb55f10cf9d47c0ea772 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Tue, 25 Feb 2025 10:53:17 +0100 Subject: [PATCH 04/36] verts2edges_scalar_lib --- src/interpolation/interpolation_bindings.cpp | 79 +++++++++++++++++-- src/interpolation/interpolation_bindings.h | 55 +++++++++---- .../mo_lib_interpolation_scalar.cpp | 62 +++++++++++++++ .../mo_lib_interpolation_scalar.hpp | 10 +++ 4 files changed, 182 insertions(+), 24 deletions(-) create mode 100644 src/interpolation/mo_lib_interpolation_scalar.cpp create mode 100644 src/interpolation/mo_lib_interpolation_scalar.hpp diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index daa1c44..c2f0c5e 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -11,19 +11,21 @@ #include "interpolation_bindings.h" #include "mo_lib_interpolation_vector.hpp" - -void edges2cells_vector_lib_c( - const wp* p_vn_in, const wp* p_vt_in, +#include "mo_lib_interpolation_scalar.hpp" + +//mo_lib_interpolation_vector.F90 +void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, - const wp* e_bln_c_u, const wp* e_bln_c_v, - wp* p_u_out, wp* p_v_out, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - int nlev, int nblks_e, int nblks_c) -{ - edges2cells_vector_lib<wp>(p_vn_in, p_vt_in, + int nlev, int nblks_e, int nblks_c){ + + edges2cells_vector_lib<double>(p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, p_u_out, p_v_out, @@ -34,3 +36,64 @@ void edges2cells_vector_lib_c( nlev, nblks_e, nblks_c); } +void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c){ + + edges2cells_vector_lib<float>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} + +//mo_lib_interpolation_scalar.F90 +void verts2edges_scalar_lib_dp( + const double* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const double* coeff_int, const int* shape_coeff_int, + double* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc){ + + verts2edges_scalar_lib<double>(p_vertex_in, shape_p_vertex_in, + edge_vertex_idx, edge_vertex_blk, shape_edge_vertex, + coeff_int, shape_coeff_int, + p_edge_out, shape_p_edge_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, lacc); + +} + +void verts2edges_scalar_lib_sp( + const float* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const float* coeff_int, const int* shape_coeff_int, + float* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc){ + + verts2edges_scalar_lib<float>(p_vertex_in, shape_p_vertex_in, + edge_vertex_idx, edge_vertex_blk, shape_edge_vertex, + coeff_int, shape_coeff_int, + p_edge_out, shape_p_edge_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, + nproma, lacc); + +} + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 5134365..dceba97 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -10,24 +10,47 @@ // --------------------------------------------------------------- #pragma once -#ifdef __SINGLE_PRECISION - using wp = single; -#else - using wp = double; -#endif +extern "C"{ + //mo_lib_interpolation_vector.F90 + void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); -extern "C"{ + void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); - void edges2cells_vector_lib_c( - const wp* p_vn_in, const wp* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const wp* e_bln_c_u, const wp* e_bln_c_v, - wp* p_u_out, wp* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); + //mo_lib_interpolation_scalar.F90 + void verts2edges_scalar_lib_dp( + const double* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const double* coeff_int, const int* shape_coeff_int, + double* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc); + void verts2edges_scalar_lib_sp( + const float* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const float* coeff_int, const int* shape_coeff_int, + float* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc); } \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp new file mode 100644 index 0000000..ac9e4f0 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -0,0 +1,62 @@ +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_interpolation_scalar.hpp" + +template <typename T> +void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const T* coeff_int, const int* shape_coeff_int, + T* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc){ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_vertex_in_view (p_vertex_in, shape_p_vertex_in[0], shape_p_vertex_in[1], shape_p_vertex_in[2]); + UnmanagedConstInt3D iidx_view (edge_vertex_idx, shape_edge_vertex[0], shape_edge_vertex[1], shape_edge_vertex[2]); + UnmanagedConstInt3D iblk_view (edge_vertex_blk, shape_edge_vertex[0], shape_edge_vertex[1], shape_edge_vertex[2]); + UnmanagedConstT3D coeff_int_view (coeff_int, shape_coeff_int[0], shape_coeff_int[1], shape_coeff_int[2]); + UnmanagedT3D p_edge_out_view (p_edge_out, shape_p_edge_out[0], shape_p_edge_out[1], shape_p_edge_out[2]); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::parallel_for("verts2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + ({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int je) { + + p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * + p_vertex_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb)*p_vertex_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + + }); + Kokkos::fence(); + } +} + +template +void verts2edges_scalar_lib<double>(const double* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const double* coeff_int, const int* shape_coeff_int, + double* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc); + +template +void verts2edges_scalar_lib<float>(const float* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const float* coeff_int, const int* shape_coeff_int, + float* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp new file mode 100644 index 0000000..5178308 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -0,0 +1,10 @@ +#pragma once + +template <typename T> +void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, + const T* coeff_int, const int* shape_coeff_int, + T* p_edge_out, const int* shape_p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const bool lacc); \ No newline at end of file -- GitLab From a5760248c767ce77c8e39b427f801e4e7cb4f1c2 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Tue, 25 Feb 2025 12:31:16 +0100 Subject: [PATCH 05/36] cells2edges_scalar_lib --- src/interpolation/interpolation_bindings.cpp | 42 +++++++ src/interpolation/interpolation_bindings.h | 24 ++++ .../mo_lib_interpolation_scalar.cpp | 105 +++++++++++++++++- .../mo_lib_interpolation_scalar.hpp | 10 +- 4 files changed, 179 insertions(+), 2 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index c2f0c5e..61e9c89 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -97,3 +97,45 @@ void verts2edges_scalar_lib_sp( } +void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc){ + + cells2edges_scalar_lib<double, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, + nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); + +} + + +void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc){ + + cells2edges_scalar_lib<float, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, + nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); + +} + +void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc){ + + cells2edges_scalar_lib<double, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, + nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); + +} + + + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index dceba97..d2411af 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -53,4 +53,28 @@ extern "C"{ const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const bool lacc); + + + void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + + + void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + + void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + } \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index ac9e4f0..03822b6 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -1,5 +1,6 @@ #include <Kokkos_Core.hpp> #include <vector> +#include <iostream> #include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_scalar.hpp" @@ -43,6 +44,82 @@ void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, } } +template <typename T, typename S> +void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const T* coeff_int, T* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc){ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstS3D p_cell_in_view (p_cell_in, nproma, nlev, nblk_c); + UnmanagedConstInt3D iidx_view (edge_cell_idx, nproma, nblks_e, 2); + UnmanagedConstInt3D iblk_view (edge_cell_blk, nproma, nblks_e, 2); + UnmanagedConstT3D coeff_int_view (coeff_int, nproma, 2, nblks_e); + UnmanagedT3D p_edge_out_view (p_edge_out, nproma, nlev, nblks_e); + + //Fill outermost nest boundary + int i_startblk, i_endblk; + if ((l_limited_area || patch_id > 0) && (lfill_latbc)){ + i_startblk = i_startblk_in[0]; + i_endblk = i_endblk_in[0]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + ({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int je) { + + if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ + p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); + } + else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0){ + p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + } + else{ + std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling" << std::endl; + std::exit(EXIT_FAILURE); + } + }); + Kokkos::fence(); + } + } + else{ + //Process the remaining grid points for which a real interpolation is possible + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + ({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int je) { + + p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * + p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb) * p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + + }); + Kokkos::fence(); + } + } + +} + template void verts2edges_scalar_lib<double>(const double* p_vertex_in, const int* shape_p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, @@ -59,4 +136,30 @@ void verts2edges_scalar_lib<float>(const float* p_vertex_in, const int* shape_p_ float* p_edge_out, const int* shape_p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); \ No newline at end of file + const int nproma, const bool lacc); + +template +void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template +void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template +void cells2edges_scalar_lib<double, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + + diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 5178308..b6a2509 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -7,4 +7,12 @@ void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, T* p_edge_out, const int* shape_p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); \ No newline at end of file + const int nproma, const bool lacc); + +template <typename T, typename S> +void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const T* coeff_int, T* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); -- GitLab From cc01fe7c6c42c7102f23907b97a6f940719991ac Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Tue, 25 Feb 2025 15:26:59 +0100 Subject: [PATCH 06/36] edges2verts_scalar_lib --- src/interpolation/interpolation_bindings.cpp | 22 ++++++++ src/interpolation/interpolation_bindings.h | 14 +++++ .../mo_lib_interpolation_scalar.cpp | 56 +++++++++++++++++++ .../mo_lib_interpolation_scalar.hpp | 7 +++ 4 files changed, 99 insertions(+) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 61e9c89..509be83 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -137,5 +137,27 @@ void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_i } +void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc){ + + edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, + i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, cell_type,lacc); +} + +void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc){ + + edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, + i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, cell_type,lacc); + +} + + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index d2411af..c630ae8 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -77,4 +77,18 @@ extern "C"{ const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); + void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + + + void edges2verts_scalar_lib_sp (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + + } \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 03822b6..34d3336 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -120,6 +120,47 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const } +template <typename T> +void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc){ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_edge_in_view (p_edge_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D iidx_view (vert_edge_idx, nproma, nblks_v, 5); + UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 5); + UnmanagedConstT3D v_int_view (v_int, nproma,cell_type,nblks_v); + UnmanagedT3D p_vert_out_view (p_vert_out, nproma,nlev,nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::parallel_for("edges2verts_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + ({slev, i_startidx}, {elev, i_endidx}), + KOKKOS_LAMBDA(const int jk, const int jv) { + + p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } + +} + template void verts2edges_scalar_lib<double>(const double* p_vertex_in, const int* shape_p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, @@ -163,3 +204,18 @@ void cells2edges_scalar_lib<double, float>(const float* p_cell_in, const int* ed const bool l_limited_area, const bool lfill_latbc, const bool lacc); +template +void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + +template +void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + + diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index b6a2509..6145089 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -16,3 +16,10 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const const int slev, const int elev, const int nproma, const int nlev, const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template <typename T> +void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); -- GitLab From 350143de7d8cce4b14eaf7b259f688766af04005 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Tue, 25 Feb 2025 15:37:34 +0100 Subject: [PATCH 07/36] remove shape --- src/interpolation/interpolation_bindings.cpp | 40 +++++++++---------- src/interpolation/interpolation_bindings.h | 33 +++++++-------- .../mo_lib_interpolation_scalar.cpp | 40 +++++++++---------- .../mo_lib_interpolation_scalar.hpp | 15 +++---- 4 files changed, 61 insertions(+), 67 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 509be83..2589edf 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -59,41 +59,37 @@ void edges2cells_vector_lib_sp( } //mo_lib_interpolation_scalar.F90 -void verts2edges_scalar_lib_dp( - const double* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const double* coeff_int, const int* shape_coeff_int, - double* p_edge_out, const int* shape_p_edge_out, +void verts2edges_scalar_lib_dp(const double* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const double* coeff_int, + double* p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc){ + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - verts2edges_scalar_lib<double>(p_vertex_in, shape_p_vertex_in, - edge_vertex_idx, edge_vertex_blk, shape_edge_vertex, - coeff_int, shape_coeff_int, - p_edge_out, shape_p_edge_out, + verts2edges_scalar_lib<double>(p_vertex_in, + edge_vertex_idx, edge_vertex_blk, coeff_int, + p_edge_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, - nproma, lacc); + nproma, nlev, nblks_v, nblks_e, lacc); } -void verts2edges_scalar_lib_sp( - const float* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const float* coeff_int, const int* shape_coeff_int, - float* p_edge_out, const int* shape_p_edge_out, +void verts2edges_scalar_lib_sp(const float* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const float* coeff_int, + float* p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc){ + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - verts2edges_scalar_lib<float>(p_vertex_in, shape_p_vertex_in, - edge_vertex_idx, edge_vertex_blk, shape_edge_vertex, - coeff_int, shape_coeff_int, - p_edge_out, shape_p_edge_out, + verts2edges_scalar_lib<float>(p_vertex_in, + edge_vertex_idx, edge_vertex_blk, coeff_int, + p_edge_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, - nproma, lacc); + nproma, nlev, nblks_v, nblks_e, lacc); } diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index c630ae8..8567ef9 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -36,25 +36,22 @@ extern "C"{ int nlev, int nblks_e, int nblks_c); //mo_lib_interpolation_scalar.F90 - void verts2edges_scalar_lib_dp( - const double* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const double* coeff_int, const int* shape_coeff_int, - double* p_edge_out, const int* shape_p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); + void verts2edges_scalar_lib_dp(const double* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const double* coeff_int, + double* p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - void verts2edges_scalar_lib_sp( - const float* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const float* coeff_int, const int* shape_coeff_int, - float* p_edge_out, const int* shape_p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); - - + void verts2edges_scalar_lib_sp(const float* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const float* coeff_int, + float* p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 34d3336..0994ac1 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -5,24 +5,24 @@ #include "mo_lib_interpolation_scalar.hpp" template <typename T> -void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const T* coeff_int, const int* shape_coeff_int, - T* p_edge_out, const int* shape_p_edge_out, +void verts2edges_scalar_lib(const T* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const T* coeff_int, + T* p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc){ + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedConstT3D p_vertex_in_view (p_vertex_in, shape_p_vertex_in[0], shape_p_vertex_in[1], shape_p_vertex_in[2]); - UnmanagedConstInt3D iidx_view (edge_vertex_idx, shape_edge_vertex[0], shape_edge_vertex[1], shape_edge_vertex[2]); - UnmanagedConstInt3D iblk_view (edge_vertex_blk, shape_edge_vertex[0], shape_edge_vertex[1], shape_edge_vertex[2]); - UnmanagedConstT3D coeff_int_view (coeff_int, shape_coeff_int[0], shape_coeff_int[1], shape_coeff_int[2]); - UnmanagedT3D p_edge_out_view (p_edge_out, shape_p_edge_out[0], shape_p_edge_out[1], shape_p_edge_out[2]); + UnmanagedConstT3D p_vertex_in_view (p_vertex_in, nproma,nlev,nblks_v); + UnmanagedConstInt3D iidx_view (edge_vertex_idx,nproma,nblks_e, 4); + UnmanagedConstInt3D iblk_view (edge_vertex_blk, nproma,nblks_e, 4); + UnmanagedConstT3D coeff_int_view (coeff_int, nproma,2,nblks_e); + UnmanagedT3D p_edge_out_view (p_edge_out, nproma,nlev,nblks_e); for (int jb = i_startblk; jb < i_endblk; ++jb) { @@ -162,22 +162,22 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const } template -void verts2edges_scalar_lib<double>(const double* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const double* coeff_int, const int* shape_coeff_int, - double* p_edge_out, const int* shape_p_edge_out, +void verts2edges_scalar_lib<double>(const double* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const double* coeff_int, + double* p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); template -void verts2edges_scalar_lib<float>(const float* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const float* coeff_int, const int* shape_coeff_int, - float* p_edge_out, const int* shape_p_edge_out, +void verts2edges_scalar_lib<float>(const float* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const float* coeff_int, + float* p_edge_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); template void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 6145089..1c578b0 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -1,13 +1,14 @@ #pragma once template <typename T> -void verts2edges_scalar_lib(const T* p_vertex_in, const int* shape_p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, const int* shape_edge_vertex, - const T* coeff_int, const int* shape_coeff_int, - T* p_edge_out, const int* shape_p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const bool lacc); +void verts2edges_scalar_lib(const T* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const T* coeff_int, + T* p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); +; template <typename T, typename S> void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, -- GitLab From a51b9a2b201d5932ee3dc5376324b30735cb0e51 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 15:51:51 +0100 Subject: [PATCH 08/36] Partial mo_lib_interpolation_scalar from Dylan --- src/interpolation/CMakeLists.txt | 2 + .../mo_lib_interpolation_scalar_Dylan.cpp | 408 ++++++++++++++++++ .../mo_lib_interpolation_scalar_Dylan.hpp | 54 +++ ...ib_interpolation_scalar_bindings_Dylan.cpp | 219 ++++++++++ ..._lib_interpolation_scalar_bindings_Dylan.h | 109 +++++ 5 files changed, 792 insertions(+) create mode 100644 src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp create mode 100644 src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp create mode 100644 src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp create mode 100644 src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index eade738..3e8ef1a 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -12,6 +12,8 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 + mo_lib_interpolation_scalar_Dylan.cpp + mo_lib_interpolation_scalar_bindings_Dylan.cpp mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 diff --git a/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp b/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp new file mode 100644 index 0000000..8b133c5 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp @@ -0,0 +1,408 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <Kokkos_Core.hpp> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_interpolation_scalar_Dylan.hpp" + +//------------------------------------------------------------------------ +// +//> +/// Computes interpolation from edges to cells +/// +/// Computes interpolation of scalar fields from velocity points to +/// cell centers via given interpolation weights +/// +template <typename T> +void edges2cells_scalar_lib( + const T* p_edge_in, const int* edge_idx, const int* edge_blk, + const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // edge based scalar input field, dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + + // line indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view + + // block indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view + + // coefficients for (area weighted) interpolation, dim: (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // cell based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) + + coeff_int_view(jc, 3, jb)*p_edge_in_view(iidx_view(jc, jb, 3), jk, iblk_view(jc, jb, 3)); + }); + } + +} + +template +void edges2cells_scalar_lib<double>( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +template +void edges2cells_scalar_lib<float>( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +//------------------------------------------------------------------------ +//> +//// Computes average of scalar fields from centers of cells to vertices. +//// +template <typename T, typename S> +void cells2verts_scalar_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cells2verts_scalar_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + p_vert_out_view(jv, jk, jb) = + coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + }); + } +} + +template +void cells2verts_scalar_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +//------------------------------------------------------------------------ + + +//> +/// Same as above, but provides output optionally in single precision and +/// assumes reversed index order of the output field in loop exchange mode +/// +template <typename T, typename S> +void cells2verts_scalar_ri_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + p_vert_out_view(jv, jk, jb) = + coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + }); + } + +} + +template +void cells2verts_scalar_ri_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + +//> +/// Computes average of scalar fields from vertices to centers of cells. +/// +template <typename T> +void verts2cells_scalar_lib( + const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_v) + UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); + + // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, 3); // cell_vertex_idx + + // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, 3); // cell_vertex_blk + + // coefficients for interpolation, dim: (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + for (int jb = 1; jb<nblks_c; ++jb){ + + int nlen = (jb != nblks_c ? nproma: npromz_c); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, 0}, {elev + 1, nlen + 1}); + + Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb)*p_vert_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb)*p_vert_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb)*p_vert_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); + + }); + } +} + +template +void verts2cells_scalar_lib<double>( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +template +void verts2cells_scalar_lib<float>( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + + +//------------------------------------------------------------------------- +// +// +//> +/// Computes the average of a cell-based variable. +/// +/// Computes the average of a cell-based variable +/// over its original location and the neighboring triangles. +/// Version with variable weighting coefficients, computed such that +/// linear horizontal gradients are not aliased into a checkerboard noise +/// input: lives on centers of triangles +/// output: lives on centers of triangles +/// + +template <typename T> +void cell_avg_lib( + const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based variable before averaging, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); + // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, 3); // cell_neighbour_idx + // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk + // averaging coefficients, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + // cell based variable after averaging, dim: (nproma,nlev,nblks_c) + UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb<i_endblk; ++jb){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // calculate the weighted average + + avg_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb)*avg_coeff_view(jc, 0, jb) + + psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + + psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); + }); + } +} + +template +void cell_avg_lib<double>( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); + +template +void cell_avg_lib<float>( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp b/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp new file mode 100644 index 0000000..887dd4a --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp @@ -0,0 +1,54 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T> +void edges2cells_scalar_lib( + const T* p_edge_in, const int* edge_idx, const int* edge_blk, + const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +template <typename T, typename S> +void cells2verts_scalar_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T, typename S> +void cells2verts_scalar_ri_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T> +void verts2cells_scalar_lib( + const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +template <typename T> +void cell_avg_lib( + const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp new file mode 100644 index 0000000..dc3b509 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp @@ -0,0 +1,219 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_interpolation_scalar_bindings_Dylan.h" +#include "mo_lib_interpolation_scalar_Dylan.hpp" + +void edges2cells_scalar_lib_dp( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + edges2cells_scalar_lib<double>( + p_edge_in, edge_idx, edge_blk, + coeff_int, p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_e, nblks_c, + lacc); +} + +void edges2cells_scalar_lib_sp( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + + edges2cells_scalar_lib<float>( + p_edge_in, edge_idx, edge_blk, + coeff_int, p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_e, nblks_c, + lacc); +} + +///////////////////////////////////////////// + +void cells2verts_scalar_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<double,double>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<double,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<float,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +///////////////////////////////////////////// + +void cells2verts_scalar_ri_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<double,double>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_ri_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<double,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_ri_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<float,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +///////////////////////////////////////////// + +void verts2cells_scalar_lib_dp( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + verts2cells_scalar_lib<double>( + p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, + slev, elev, nproma, nlev, + nblks_v, lacc); +} + +void verts2cells_scalar_lib_sp( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + verts2cells_scalar_lib<float>( + p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, + slev, elev, nproma, nlev, + nblks_v, lacc); +} + +///////////////////////////////////////////// + +void cell_avg_lib_dp( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc) +{ + cell_avg_lib<double>( + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, + lacc); +} + +void cell_avg_lib_sp( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc) +{ + cell_avg_lib<float>( + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, + lacc); +} + \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h new file mode 100644 index 0000000..4cb399b --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h @@ -0,0 +1,109 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +void edges2cells_scalar_lib_dp( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); +void edges2cells_scalar_lib_sp( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +///////////////////////////////////////////// + +void cells2verts_scalar_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); +void cells2verts_scalar_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); +void cells2verts_scalar_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +///////////////////////////////////////////// + +void cells2verts_scalar_ri_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +void cells2verts_scalar_ri_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +void cells2verts_scalar_ri_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +///////////////////////////////////////////// + +void verts2cells_scalar_lib_dp( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +void verts2cells_scalar_lib_sp( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +///////////////////////////////////////////// + +void cell_avg_lib_dp( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); +void cell_avg_lib_sp( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); + \ No newline at end of file -- GitLab From fc76537f1b2ed49bfa32cb806f6d2b9b0de70dcf Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 15:59:46 +0100 Subject: [PATCH 09/36] adding license headers --- src/interpolation/mo_lib_interpolation_scalar.cpp | 12 +++++++++++- src/interpolation/mo_lib_interpolation_scalar.hpp | 11 +++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 0994ac1..9a5f206 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -1,5 +1,15 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <Kokkos_Core.hpp> -#include <vector> #include <iostream> #include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_scalar.hpp" diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 1c578b0..ca52349 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #pragma once template <typename T> -- GitLab From 00aa94ae6a88437922f65ebef65ce4b1681168e0 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 16:07:25 +0100 Subject: [PATCH 10/36] Merging Dylan and Harshada's parts of mo_lib_interpolation_scalar --- src/interpolation/CMakeLists.txt | 2 - src/interpolation/interpolation_bindings.cpp | 202 +++++++++ src/interpolation/interpolation_bindings.h | 95 ++++ .../mo_lib_interpolation_scalar.cpp | 393 +++++++++++++++++ .../mo_lib_interpolation_scalar.hpp | 42 ++ .../mo_lib_interpolation_scalar_Dylan.cpp | 408 ------------------ .../mo_lib_interpolation_scalar_Dylan.hpp | 54 --- ...ib_interpolation_scalar_bindings_Dylan.cpp | 219 ---------- ..._lib_interpolation_scalar_bindings_Dylan.h | 109 ----- 9 files changed, 732 insertions(+), 792 deletions(-) delete mode 100644 src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp delete mode 100644 src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp delete mode 100644 src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp delete mode 100644 src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 3e8ef1a..eade738 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -12,8 +12,6 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 - mo_lib_interpolation_scalar_Dylan.cpp - mo_lib_interpolation_scalar_bindings_Dylan.cpp mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 2589edf..4476035 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -154,6 +154,208 @@ void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, } +void edges2cells_scalar_lib_dp( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + edges2cells_scalar_lib<double>( + p_edge_in, edge_idx, edge_blk, + coeff_int, p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_e, nblks_c, + lacc); +} + +void edges2cells_scalar_lib_sp( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + + edges2cells_scalar_lib<float>( + p_edge_in, edge_idx, edge_blk, + coeff_int, p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_e, nblks_c, + lacc); +} + +///////////////////////////////////////////// + +void cells2verts_scalar_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<double,double>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<double,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_lib<float,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +///////////////////////////////////////////// +void cells2verts_scalar_ri_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<double,double>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_ri_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<double,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +void cells2verts_scalar_ri_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + cells2verts_scalar_ri_lib<float,float>( + p_cell_in, vert_cell_idx, vert_cell_blk, + coeff_int, p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, nblks_v, + lacc, acc_async); +} + +///////////////////////////////////////////// +void verts2cells_scalar_lib_dp( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + verts2cells_scalar_lib<double>( + p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, + slev, elev, nproma, nlev, + nblks_v, lacc); +} + +void verts2cells_scalar_lib_sp( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + verts2cells_scalar_lib<float>( + p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, + slev, elev, nproma, nlev, + nblks_v, lacc); +} +///////////////////////////////////////////// + +void cell_avg_lib_dp( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc) +{ + cell_avg_lib<double>( + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, + lacc); +} + +void cell_avg_lib_sp( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc) +{ + cell_avg_lib<float>( + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, nproma, + nlev, nblks_c, + lacc); +} + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 8567ef9..123c487 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -87,5 +87,100 @@ extern "C"{ const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const int cell_type, const bool lacc); + void edges2cells_scalar_lib_dp( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + void edges2cells_scalar_lib_sp( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + + ///////////////////////////////////////////// + + void cells2verts_scalar_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + void cells2verts_scalar_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + void cells2verts_scalar_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + ///////////////////////////////////////////// + + void cells2verts_scalar_ri_lib_dp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + void cells2verts_scalar_ri_lib_dp2sp( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + void cells2verts_scalar_ri_lib_sp( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + ///////////////////////////////////////////// + + void verts2cells_scalar_lib_dp( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + + void verts2cells_scalar_lib_sp( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + + ///////////////////////////////////////////// + + void cell_avg_lib_dp( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); + void cell_avg_lib_sp( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); } \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 9a5f206..7f91246 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -229,3 +229,396 @@ void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edg const int nblks_v, const int cell_type, const bool lacc); +//------------------------------------------------------------------------ +// +//> +/// Computes interpolation from edges to cells +/// +/// Computes interpolation of scalar fields from velocity points to +/// cell centers via given interpolation weights +/// +template <typename T> +void edges2cells_scalar_lib( + const T* p_edge_in, const int* edge_idx, const int* edge_blk, + const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // edge based scalar input field, dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + + // line indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view + + // block indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view + + // coefficients for (area weighted) interpolation, dim: (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // cell based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) + + coeff_int_view(jc, 3, jb)*p_edge_in_view(iidx_view(jc, jb, 3), jk, iblk_view(jc, jb, 3)); + }); + } + +} + +template +void edges2cells_scalar_lib<double>( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +template +void edges2cells_scalar_lib<float>( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +//------------------------------------------------------------------------ +//> +//// Computes average of scalar fields from centers of cells to vertices. +//// +template <typename T, typename S> +void cells2verts_scalar_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cells2verts_scalar_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + p_vert_out_view(jv, jk, jb) = + coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + }); + } +} + +template +void cells2verts_scalar_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +//------------------------------------------------------------------------ + + +//> +/// Same as above, but provides output optionally in single precision and +/// assumes reversed index order of the output field in loop exchange mode +/// +template <typename T, typename S> +void cells2verts_scalar_ri_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + p_vert_out_view(jv, jk, jb) = + coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + }); + } + +} + +template +void cells2verts_scalar_ri_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + +//> +/// Computes average of scalar fields from vertices to centers of cells. +/// +template <typename T> +void verts2cells_scalar_lib( + const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_v) + UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); + + // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, 3); // cell_vertex_idx + + // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, 3); // cell_vertex_blk + + // coefficients for interpolation, dim: (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + for (int jb = 1; jb<nblks_c; ++jb){ + + int nlen = (jb != nblks_c ? nproma: npromz_c); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, 0}, {elev + 1, nlen + 1}); + + Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb)*p_vert_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb)*p_vert_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb)*p_vert_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); + + }); + } +} + +template +void verts2cells_scalar_lib<double>( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +template +void verts2cells_scalar_lib<float>( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + + +//------------------------------------------------------------------------- +// +// +//> +/// Computes the average of a cell-based variable. +/// +/// Computes the average of a cell-based variable +/// over its original location and the neighboring triangles. +/// Version with variable weighting coefficients, computed such that +/// linear horizontal gradients are not aliased into a checkerboard noise +/// input: lives on centers of triangles +/// output: lives on centers of triangles +/// + +template <typename T> +void cell_avg_lib( + const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + // cell based variable before averaging, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); + // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, 3); // cell_neighbour_idx + // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk + // averaging coefficients, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + // cell based variable after averaging, dim: (nproma,nlev,nblks_c) + UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb<i_endblk; ++jb){ + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // calculate the weighted average + + avg_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb)*avg_coeff_view(jc, 0, jb) + + psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + + psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); + }); + } +} + +template +void cell_avg_lib<double>( + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); + +template +void cell_avg_lib<float>( + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, + const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index ca52349..d92cee0 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -35,3 +35,45 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const int cell_type, const bool lacc); + +template <typename T> +void edges2cells_scalar_lib( + const T* p_edge_in, const int* edge_idx, const int* edge_blk, + const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +template <typename T, typename S> +void cells2verts_scalar_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T, typename S> +void cells2verts_scalar_ri_lib( + const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template <typename T> +void verts2cells_scalar_lib( + const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +template <typename T> +void cell_avg_lib( + const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp b/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp deleted file mode 100644 index 8b133c5..0000000 --- a/src/interpolation/mo_lib_interpolation_scalar_Dylan.cpp +++ /dev/null @@ -1,408 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include <Kokkos_Core.hpp> -#include "mo_lib_loopindices.hpp" -#include "mo_lib_interpolation_scalar_Dylan.hpp" - -//------------------------------------------------------------------------ -// -//> -/// Computes interpolation from edges to cells -/// -/// Computes interpolation of scalar fields from velocity points to -/// cell centers via given interpolation weights -/// -template <typename T> -void edges2cells_scalar_lib( - const T* p_edge_in, const int* edge_idx, const int* edge_blk, - const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // edge based scalar input field, dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); - - // line indices of edges of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view - - // block indices of edges of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view - - // coefficients for (area weighted) interpolation, dim: (nproma,3-cell_type,nblks_c) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); - - // cell based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb < i_endblk; ++jb){ - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - p_cell_out_view(jc, jk, jb) = - coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) + - coeff_int_view(jc, 3, jb)*p_edge_in_view(iidx_view(jc, jb, 3), jk, iblk_view(jc, jb, 3)); - }); - } - -} - -template -void edges2cells_scalar_lib<double>( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -template -void edges2cells_scalar_lib<float>( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -//------------------------------------------------------------------------ -//> -//// Computes average of scalar fields from centers of cells to vertices. -//// -template <typename T, typename S> -void cells2verts_scalar_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - - // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view - - // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view - - // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 9, nblks_v); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb < i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cells2verts_scalar_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - p_vert_out_view(jv, jk, jb) = - coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + - coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + - coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + - coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + - coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + - coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - - }); - } -} - -template -void cells2verts_scalar_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -//------------------------------------------------------------------------ - - -//> -/// Same as above, but provides output optionally in single precision and -/// assumes reversed index order of the output field in loop exchange mode -/// -template <typename T, typename S> -void cells2verts_scalar_ri_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - - // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view - - // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view - - // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 9, nblks_v); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb < i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - p_vert_out_view(jv, jk, jb) = - coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + - coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + - coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + - coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + - coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + - coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - - }); - } - -} - -template -void cells2verts_scalar_ri_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - -//> -/// Computes average of scalar fields from vertices to centers of cells. -/// -template <typename T> -void verts2cells_scalar_lib( - const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_v) - UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); - - // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, 3); // cell_vertex_idx - - // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, 3); // cell_vertex_blk - - // coefficients for interpolation, dim: (nproma,3-cell_type,nblks_c) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - - for (int jb = 1; jb<nblks_c; ++jb){ - - int nlen = (jb != nblks_c ? nproma: npromz_c); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, 0}, {elev + 1, nlen + 1}); - - Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - - p_cell_out_view(jc, jk, jb) = - coeff_int_view(jc, 0, jb)*p_vert_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + - coeff_int_view(jc, 1, jb)*p_vert_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_vert_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); - - }); - } -} - -template -void verts2cells_scalar_lib<double>( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - -template -void verts2cells_scalar_lib<float>( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - - -//------------------------------------------------------------------------- -// -// -//> -/// Computes the average of a cell-based variable. -/// -/// Computes the average of a cell-based variable -/// over its original location and the neighboring triangles. -/// Version with variable weighting coefficients, computed such that -/// linear horizontal gradients are not aliased into a checkerboard noise -/// input: lives on centers of triangles -/// output: lives on centers of triangles -/// - -template <typename T> -void cell_avg_lib( - const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based variable before averaging, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); - // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, 3); // cell_neighbour_idx - // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk - // averaging coefficients, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); - - // cell based variable after averaging, dim: (nproma,nlev,nblks_c) - UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb<i_endblk; ++jb){ - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - // calculate the weighted average - - avg_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*avg_coeff_view(jc, 0, jb) + - psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + - psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + - psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); - }); - } -} - -template -void cell_avg_lib<double>( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); - -template -void cell_avg_lib<float>( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp b/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp deleted file mode 100644 index 887dd4a..0000000 --- a/src/interpolation/mo_lib_interpolation_scalar_Dylan.hpp +++ /dev/null @@ -1,54 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -template <typename T> -void edges2cells_scalar_lib( - const T* p_edge_in, const int* edge_idx, const int* edge_blk, - const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -template <typename T, typename S> -void cells2verts_scalar_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template <typename T, typename S> -void cells2verts_scalar_ri_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template <typename T> -void verts2cells_scalar_lib( - const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - -template <typename T> -void cell_avg_lib( - const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp deleted file mode 100644 index dc3b509..0000000 --- a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.cpp +++ /dev/null @@ -1,219 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_interpolation_scalar_bindings_Dylan.h" -#include "mo_lib_interpolation_scalar_Dylan.hpp" - -void edges2cells_scalar_lib_dp( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - edges2cells_scalar_lib<double>( - p_edge_in, edge_idx, edge_blk, - coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_e, nblks_c, - lacc); -} - -void edges2cells_scalar_lib_sp( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - - edges2cells_scalar_lib<float>( - p_edge_in, edge_idx, edge_blk, - coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_e, nblks_c, - lacc); -} - -///////////////////////////////////////////// - -void cells2verts_scalar_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<double,double>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -void cells2verts_scalar_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<double,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -void cells2verts_scalar_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<float,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -///////////////////////////////////////////// - -void cells2verts_scalar_ri_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<double,double>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -void cells2verts_scalar_ri_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<double,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -void cells2verts_scalar_ri_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<float,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); -} - -///////////////////////////////////////////// - -void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - verts2cells_scalar_lib<double>( - p_vert_in, cell_index_idx, cell_vertex_blk, - coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, - nblks_v, lacc); -} - -void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - verts2cells_scalar_lib<float>( - p_vert_in, cell_index_idx, cell_vertex_blk, - coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, - nblks_v, lacc); -} - -///////////////////////////////////////////// - -void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc) -{ - cell_avg_lib<double>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, - lacc); -} - -void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc) -{ - cell_avg_lib<float>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, - lacc); -} - \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h b/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h deleted file mode 100644 index 4cb399b..0000000 --- a/src/interpolation/mo_lib_interpolation_scalar_bindings_Dylan.h +++ /dev/null @@ -1,109 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -void edges2cells_scalar_lib_dp( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); -void edges2cells_scalar_lib_sp( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -///////////////////////////////////////////// - -void cells2verts_scalar_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); -void cells2verts_scalar_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); -void cells2verts_scalar_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -///////////////////////////////////////////// - -void cells2verts_scalar_ri_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -void cells2verts_scalar_ri_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -void cells2verts_scalar_ri_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -///////////////////////////////////////////// - -void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - -void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - -///////////////////////////////////////////// - -void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); -void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); - \ No newline at end of file -- GitLab From d9de153386a5b2dcce632c086c84a987c2c1fe66 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 08:57:36 +0100 Subject: [PATCH 11/36] Fix CMakeLists --- src/interpolation/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index eade738..1051516 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -12,12 +12,12 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 + mo_lib_interpolation_scalar.cpp mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp - interpolation_bindings.h interpolation_bindings.cpp ) -- GitLab From fe2f4edf159cd96d3dd6d2b0bb2ba3b64cb48641 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 08:58:15 +0100 Subject: [PATCH 12/36] Reformat mo_lib_interpolation_scalar, and fix end indexes with +1 in loops --- .../mo_lib_interpolation_scalar.cpp | 366 ++++++++++-------- 1 file changed, 199 insertions(+), 167 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 7f91246..e5369f6 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -14,6 +14,19 @@ #include "mo_lib_loopindices.hpp" #include "mo_lib_interpolation_scalar.hpp" +//----------------------------------------------------------------------- +// +// ! averaging and interpolation routines and +// ! routines needed to compute the coefficients therein +// +//----------------------------------------------------------------------- + +//----------------------------------------------------------------------- +//> +/// Performs average of scalar fields from vertices to velocity points. +/// +/// The coefficients are given by coeff_int. +/// template <typename T> void verts2edges_scalar_lib(const T* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, @@ -34,7 +47,7 @@ void verts2edges_scalar_lib(const T* p_vertex_in, UnmanagedConstT3D coeff_int_view (coeff_int, nproma,2,nblks_e); UnmanagedT3D p_edge_out_view (p_edge_out, nproma,nlev,nblks_e); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, @@ -42,7 +55,7 @@ void verts2edges_scalar_lib(const T* p_vertex_in, i_startidx, i_endidx); Kokkos::parallel_for("verts2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev, i_endidx}), + ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * @@ -54,6 +67,13 @@ void verts2edges_scalar_lib(const T* p_vertex_in, } } +//------------------------------------------------------------------------ +//> +/// Computes average of scalar fields from centers of triangular faces to. +/// +/// Computes average of scalar fields from centers of triangular faces to +/// velocity points. +/// template <typename T, typename S> void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const T* coeff_int, T* p_edge_out, const int* i_startblk_in, @@ -80,14 +100,14 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const i_startblk = i_startblk_in[0]; i_endblk = i_endblk_in[0]; - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev, i_endidx}), + ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ @@ -109,14 +129,14 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const i_startblk = i_startblk_in[1]; i_endblk = i_endblk_in[1]; - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev, i_endidx}), + ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * @@ -130,6 +150,13 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const } +//------------------------------------------------------------------------ +//> +/// Computes average of scalar fields from velocity points to. +/// +/// Computes average of scalar fields from velocity points to +/// centers of dual faces. +/// template <typename T> void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, @@ -148,7 +175,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const UnmanagedConstT3D v_int_view (v_int, nproma,cell_type,nblks_v); UnmanagedT3D p_vert_out_view (p_vert_out, nproma,nlev,nblks_v); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, @@ -156,7 +183,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const i_startidx, i_endidx); Kokkos::parallel_for("edges2verts_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev, i_endidx}), + ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int jv) { p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) @@ -171,66 +198,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const } -template -void verts2edges_scalar_lib<double>(const double* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const double* coeff_int, - double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - -template -void verts2edges_scalar_lib<float>(const float* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const float* coeff_int, - float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - -template -void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - -template -void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - -template -void cells2edges_scalar_lib<double, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - - -template -void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); - -template -void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); - - //------------------------------------------------------------------------ -// //> /// Computes interpolation from edges to cells /// @@ -268,7 +236,7 @@ void edges2cells_scalar_lib( int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk; ++jb){ + for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -286,28 +254,10 @@ void edges2cells_scalar_lib( } -template -void edges2cells_scalar_lib<double>( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -template -void edges2cells_scalar_lib<float>( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - //------------------------------------------------------------------------ //> -//// Computes average of scalar fields from centers of cells to vertices. -//// +/// Computes average of scalar fields from centers of cells to vertices. +/// template <typename T, typename S> void cells2verts_scalar_lib( const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, @@ -340,7 +290,7 @@ void cells2verts_scalar_lib( int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk; ++jb){ + for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -363,36 +313,7 @@ void cells2verts_scalar_lib( } } -template -void cells2verts_scalar_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -//------------------------------------------------------------------------ - - +//------------------------------------------------------------------------- //> /// Same as above, but provides output optionally in single precision and /// assumes reversed index order of the output field in loop exchange mode @@ -428,7 +349,7 @@ void cells2verts_scalar_ri_lib( int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk; ++jb){ + for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -452,34 +373,7 @@ void cells2verts_scalar_ri_lib( } -template -void cells2verts_scalar_ri_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - +//------------------------------------------------------------------------- //> /// Computes average of scalar fields from vertices to centers of cells. /// @@ -510,7 +404,7 @@ void verts2cells_scalar_lib( // vertex based scalar output field, dim: (nproma,nlev,nblks_c) UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - for (int jb = 1; jb<nblks_c; ++jb){ + for (int jb = 1; jb<nblks_c + 1; ++jb){ int nlen = (jb != nblks_c ? nproma: npromz_c); @@ -529,24 +423,8 @@ void verts2cells_scalar_lib( } } -template -void verts2cells_scalar_lib<double>( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - -template -void verts2cells_scalar_lib<float>( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - //------------------------------------------------------------------------- -// -// //> /// Computes the average of a cell-based variable. /// @@ -557,7 +435,6 @@ void verts2cells_scalar_lib<float>( /// input: lives on centers of triangles /// output: lives on centers of triangles /// - template <typename T> void cell_avg_lib( const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, @@ -585,7 +462,7 @@ void cell_avg_lib( int i_startidx, i_endidx; - for (int jb = i_startblk; jb<i_endblk; ++jb){ + for (int jb = i_startblk; jb<i_endblk + 1; ++jb){ get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -605,6 +482,161 @@ void cell_avg_lib( } } +//----------------------------------------------------------------------- +// +// Explicit Instantiations +// +//----------------------------------------------------------------------- + +template +void verts2edges_scalar_lib<double>(const double* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const double* coeff_int, + double* p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template +void verts2edges_scalar_lib<float>(const float* p_vertex_in, + const int* edge_vertex_idx, const int* edge_vertex_blk, + const float* coeff_int, + float* p_edge_out, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template +void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template +void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template +void cells2edges_scalar_lib<double, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + + +template +void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + +template +void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, const int nblks_e, + const int nblks_v, const int cell_type, const bool lacc); + + +template +void edges2cells_scalar_lib<double>( + const double* p_edge_in, const int* edge_idx, const int* edge_blk, + const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + +template +void edges2cells_scalar_lib<float>( + const float* p_edge_in, const int* edge_idx, const int* edge_blk, + const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); + + +template +void cells2verts_scalar_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + +template +void cells2verts_scalar_ri_lib<double,double>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<double,float>( + const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +template +void cells2verts_scalar_ri_lib<float,float>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + + + +template +void verts2cells_scalar_lib<double>( + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + +template +void verts2cells_scalar_lib<float>( + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); + + template void cell_avg_lib<double>( const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, -- GitLab From b8a1df4691d058c3a9ecfc66c497c52b70c14464 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 08:58:45 +0100 Subject: [PATCH 13/36] Use +1 for end index in mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib --- .../mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp index c9b776e..0a41fe8 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -101,7 +101,7 @@ void rbf_vec_interpol_vertex_lib( int i_startidx; // start index int i_endidx; // end index - for (jb=i_startblk; jb <= i_endblk; ++jb){ + for (jb=i_startblk; jb < i_endblk + 1; ++jb){ get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); -- GitLab From 90989fd088dee68a5a5452cd488b6ddcefade465 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 09:13:18 +0100 Subject: [PATCH 14/36] Adding first test for mo_lib_interpolation_scalar::verts2edges_scalar_lib --- test/c/CMakeLists.txt | 1 + test/c/test_interpolation_scalar.cpp | 138 +++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 test/c/test_interpolation_scalar.cpp diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 13c5dfe..16371d0 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -26,6 +26,7 @@ set(SOURCES test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp + test_interpolation_scalar.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp new file mode 100644 index 0000000..efffaef --- /dev/null +++ b/test/c/test_interpolation_scalar.cpp @@ -0,0 +1,138 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_interpolation_scalar.hpp" + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template<typename T> +size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template<typename T> +size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template<typename InT, typename OutT> +struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define a helper struct that holds the one type. +template<typename T> +struct FullPrecision { + using in_type = T; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< FullPrecision<double>, + FullPrecision<float > > FullTypes; + + +// Define a typed test fixture. +template <typename TypePair> +class Verts2edgesScalarLibFullTestFixture : public ::testing::Test { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::in_type; + + // Constant dimensions. + static constexpr int nproma = 16; // inner loop length + static constexpr int nlev = 7; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + + // Parameter values. + int i_startblk = 0; + int i_endblk = 2; // Test blocks [0, 1] + int i_startidx_in = 2; + int i_endidx_in = nproma - 3; // Full range: 0 .. nproma-1 + int slev = 1; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + + // Arrays stored in std::vector. + std::vector<InType> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) + std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) + std::vector<InType> coeff_int; // Dimensions: (nproma, 2, nblks_e) + std::vector<InType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + + Verts2edgesScalarLibFullTestFixture() { + // Allocate and initialize inputs. + p_vertex_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_v), static_cast<InType>(1)); + edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); + edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, 2, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(Verts2edgesScalarLibFullTestFixture, FullTypes); + +TYPED_TEST(Verts2edgesScalarLibFullTestFixture, BasicTest) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::InType; + + // Copy output so it can be restored after + auto tmp_p_edge_out = this->p_edge_out; + + // Call the function + verts2edges_scalar_lib<InType>( + this->p_vertex_in.data(), + this->edge_vertex_idx.data(), + this->edge_vertex_blk.data(), + this->coeff_int.data(), + this->p_edge_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->nlev, + Verts2edgesScalarLibFullTestFixture< TypeParam >::nblks_v, + Verts2edgesScalarLibFullTestFixture< TypeParam >::nblks_e, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx_in; i < this->i_endidx_in; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } + + // Restore output + this->p_edge_out = tmp_p_edge_out; +} -- GitLab From ca07f9bf124b0ef0c557e084aa7c366601a6d751 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 09:15:56 +0100 Subject: [PATCH 15/36] Restoring proper bounds for test --- test/c/test_interpolation_scalar.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index efffaef..9a47f76 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -64,11 +64,11 @@ public: // Parameter values. int i_startblk = 0; - int i_endblk = 2; // Test blocks [0, 1] + int i_endblk = 1; // Test blocks [0, 1] int i_startidx_in = 2; - int i_endidx_in = nproma - 3; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma - 3; // Partial range: 2 .. nproma-3 int slev = 1; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) bool lacc = false; // Not using ACC-specific behavior. bool acc_async = false; // No asynchronous execution. -- GitLab From 3be29336ca0b63e837dbbd1b1f0d52940e44314d Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 15:12:49 +0100 Subject: [PATCH 16/36] Formatting --- .../mo_lib_interpolation_scalar.cpp | 10 ++-- .../mo_lib_interpolation_scalar.hpp | 59 +++++++++---------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index e5369f6..a9b3f46 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -187,11 +187,11 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const KOKKOS_LAMBDA(const int jk, const int jv) { p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) - + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) - + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) - + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) - + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) - + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); }); Kokkos::fence(); } diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index d92cee0..95febf1 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -37,43 +37,38 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const const int nblks_v, const int cell_type, const bool lacc); template <typename T> -void edges2cells_scalar_lib( - const T* p_edge_in, const int* edge_idx, const int* edge_blk, - const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); +void edges2cells_scalar_lib(const T* p_edge_in, const int* edge_idx, const int* edge_blk, + const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_e, const int nblks_c, + const bool lacc); template <typename T, typename S> -void cells2verts_scalar_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); +void cells2verts_scalar_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); template <typename T, typename S> -void cells2verts_scalar_ri_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); +void cells2verts_scalar_ri_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); template <typename T> -void verts2cells_scalar_lib( - const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); +void verts2cells_scalar_lib(const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_v, const bool lacc); template <typename T> -void cell_avg_lib( - const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file +void cell_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file -- GitLab From 011e68ae08b27c21d215a230e554a4ff30d35388 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 15:24:39 +0100 Subject: [PATCH 17/36] index from 0 with statics in edges2cells --- src/interpolation/mo_lib_interpolation_scalar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index a9b3f46..d5f3b8f 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -246,9 +246,9 @@ void edges2cells_scalar_lib( Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb)*p_edge_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) + - coeff_int_view(jc, 3, jb)*p_edge_in_view(iidx_view(jc, jb, 3), jk, iblk_view(jc, jb, 3)); + coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); }); } -- GitLab From bf8338be6e3b5f6fd2f4f2ef515d9702bb2d1d49 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 15:24:59 +0100 Subject: [PATCH 18/36] Update tests, about halfway mark --- test/c/test_interpolation_scalar.cpp | 332 +++++++++++++++++++++++---- 1 file changed, 293 insertions(+), 39 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 9a47f76..78c615e 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -32,54 +32,58 @@ struct MixedPrecision { using out_type = OutT; }; -// Define a helper struct that holds the one type. -template<typename T> -struct FullPrecision { - using in_type = T; -}; - // Define the list of type pairs we want to test. typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<double, float>, MixedPrecision<float, float> > MixedTypes; // Define the list of type pairs we want to test. -typedef ::testing::Types< FullPrecision<double>, - FullPrecision<float > > FullTypes; +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<float, float > > OneTypes; -// Define a typed test fixture. -template <typename TypePair> -class Verts2edgesScalarLibFullTestFixture : public ::testing::Test { +// Shared dimensions for all routines and classes +class interp_dimensions { public: - using InType = typename TypePair::in_type; - using OutType = typename TypePair::in_type; - // Constant dimensions. static constexpr int nproma = 16; // inner loop length static constexpr int nlev = 7; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] - int i_startidx_in = 2; - int i_endidx_in = nproma - 3; // Partial range: 2 .. nproma-3 - int slev = 1; - int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx = 2; + const int i_endidx = nproma - 3; // Partial range: 2 .. nproma-3 + const int slev = 1; + const int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// ! verts2edges +// +//////////////////////////////////////////////////////////////////////////////// + +// Define a test fixture +template <typename Types> +class Verts2edgesScalarLibTestFixture : public testing::Test, public interp_dimensions { +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; // Arrays stored in std::vector. std::vector<InType> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) std::vector<InType> coeff_int; // Dimensions: (nproma, 2, nblks_e) - std::vector<InType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) - Verts2edgesScalarLibFullTestFixture() { + Verts2edgesScalarLibTestFixture() { // Allocate and initialize inputs. p_vertex_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_v), static_cast<InType>(1)); edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); @@ -91,14 +95,13 @@ public: } }; -TYPED_TEST_SUITE(Verts2edgesScalarLibFullTestFixture, FullTypes); +// Add test suite +TYPED_TEST_SUITE(Verts2edgesScalarLibTestFixture, OneTypes); -TYPED_TEST(Verts2edgesScalarLibFullTestFixture, BasicTest) { +// Add test +TYPED_TEST(Verts2edgesScalarLibTestFixture, verts2edges) { using InType = typename TestFixture::InType; - using OutType = typename TestFixture::InType; - - // Copy output so it can be restored after - auto tmp_p_edge_out = this->p_edge_out; + using OutType = typename TestFixture::OutType; // Call the function verts2edges_scalar_lib<InType>( @@ -109,30 +112,281 @@ TYPED_TEST(Verts2edgesScalarLibFullTestFixture, BasicTest) { this->p_edge_out.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, + this->i_startidx, + this->i_endidx, this->slev, this->elev, this->nproma, this->nlev, - Verts2edgesScalarLibFullTestFixture< TypeParam >::nblks_v, - Verts2edgesScalarLibFullTestFixture< TypeParam >::nblks_e, + this->nblks_v, + this->nblks_e, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx, i_endidx] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 2 stencil points, expect 2. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2edges +// +//////////////////////////////////////////////////////////////////////////////// + + + +// Define a typed test fixture with one precision of data +template <typename Types> +class Cells2edgesScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) + std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) + std::vector<OutType> coeff_int; // Dimensions: (nproma, 2, nblks_e) + std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + + // TODO review these values + const int patch_id = 0; + const bool l_limited_area = false; + const bool lfill_latbc = false; + std::vector<int> i_startblk_in; // Dimensions: (2) + std::vector<int> i_endblk_in; // Dimensions: (2) + std::vector<int> i_startidx_in; // Dimensions: (2) + std::vector<int> i_endidx_in; // Dimensions: (2) + + Cells2edgesScalarLibTestFixture() { + // Allocate and initialize inputs. + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + edge_cell_idx.resize(num_elements_3d<int>(nproma, nblks_e, 2), 1); + edge_cell_blk.resize(num_elements_3d<int>(nproma, nblks_e, 2), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, 2, nblks_e), static_cast<OutType>(1)); + + // Allocate output arrays and initialize to zero. + p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), static_cast<OutType>(0)); + + // Allocate neighbour indexes + i_startblk_in.resize(2, i_startblk); + i_endblk_in.resize(2, i_endblk); + i_startidx_in.resize(2, i_startidx); + i_endidx_in.resize(2, i_endidx); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, OneTypes); + +// Add test +TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cells2edges_scalar_lib<InType>( + this->p_cell_in.data(), + this->edge_cell_idx.data(), + this->edge_cell_blk.data(), + this->coeff_int.data(), + this->p_edge_out.data(), + this->i_startblk_in.data(), + this->i_endblk_in.data(), + this->i_startidx_in.data(), + this->i_endidx_in.data(), + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_c, + this->nblks_e, + this->patch_id, + this->l_limited_area, + this->lfill_latbc, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx_in; i < this->i_endidx_in; ++i) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. + // Since every contribution is 1 and there are 2 stencil points, expect 2. EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), static_cast<OutType>(1e-5)) << "Failure at block " << block << ", level " << level << ", index " << i; } } } +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// ! edges2verts +// +//////////////////////////////////////////////////////////////////////////////// - // Restore output - this->p_edge_out = tmp_p_edge_out; +// Define a typed test fixture with one precision of data +template <typename Types> +class Edges2vertsScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::in_type; + + // Arrays stored in std::vector. + std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) + std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) + std::vector<OutType> v_int; // Dimensions: (nproma, 6-cell_type, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + const int cell_type = 6; + + Edges2vertsScalarLibTestFixture() { + // Allocate and initialize inputs. + p_edge_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); + edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); + v_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<OutType>(1)); + + // Allocate output arrays and initialize to zero. + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Edges2vertsScalarLibTestFixture, OneTypes); + +// Add test +TYPED_TEST(Edges2vertsScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + edges2verts_scalar_lib<InType>( + this->p_edge_in.data(), + this->edge_vert_idx.data(), + this->edge_vert_blk.data(), + this->v_int.data(), + this->p_vert_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_e, + this->nblks_v, + this->cell_type, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// ! edges2cells +// +//////////////////////////////////////////////////////////////////////////////// + + +// Define a typed test fixture with one precision of data +template <typename Types> +class Edges2cellsScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::in_type; + + // Arrays stored in std::vector. + std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<OutType> coeff_int; // Dimensions: (nproma, 3-cell_type, nblks_c) + std::vector<OutType> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + + const int cell_type = 3; + + Edges2cellsScalarLibTestFixture() { + // Allocate and initialize inputs. + p_edge_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, cell_type), 1); + edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, cell_type), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_c), static_cast<OutType>(1)); + + // Allocate output arrays and initialize to zero. + p_cell_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Edges2cellsScalarLibTestFixture, OneTypes); + +// Add test +TYPED_TEST(Edges2cellsScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + edges2cells_scalar_lib<InType>( + this->p_edge_in.data(), + this->edge_idx.data(), + this->edge_blk.data(), + this->coeff_int.data(), + this->p_cell_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_e, + this->nblks_c, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<OutType>(3), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } } + +// TODO: +// cells2verts, cells2verts ri, verts2cells, cell_avg_lib \ No newline at end of file -- GitLab From e7527f1e94049fa38231332de8575d93469134fb Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 17:56:00 +0100 Subject: [PATCH 19/36] Fix bounds nproma in verts2cells --- src/interpolation/mo_lib_interpolation_scalar.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index d5f3b8f..9529125 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -404,9 +404,11 @@ void verts2cells_scalar_lib( // vertex based scalar output field, dim: (nproma,nlev,nblks_c) UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - for (int jb = 1; jb<nblks_c + 1; ++jb){ + for (int jb = 0; jb<nblks_c; ++jb){ - int nlen = (jb != nblks_c ? nproma: npromz_c); + int nlen; + if (jb != nblks_c){ nlen = nproma; } + else { nlen = npromz_c} Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( {slev, 0}, {elev + 1, nlen + 1}); -- GitLab From e0a23eac5464b78bd693581646907f0cf797d78d Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 17:57:46 +0100 Subject: [PATCH 20/36] All tests for mo_lib_interpolation_scalar --- test/c/test_interpolation_scalar.cpp | 316 ++++++++++++++++++++++++++- 1 file changed, 311 insertions(+), 5 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 78c615e..c4e414d 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -188,7 +188,7 @@ public: }; // Add test suite -TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, OneTypes); +TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, MixedTypes); // Add test TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { @@ -244,7 +244,7 @@ template <typename Types> class Edges2vertsScalarLibTestFixture : public testing::Test, public interp_dimensions{ public: using InType = typename Types::in_type; - using OutType = typename Types::in_type; + using OutType = typename Types::out_type; // Arrays stored in std::vector. std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) @@ -323,7 +323,7 @@ template <typename Types> class Edges2cellsScalarLibTestFixture : public testing::Test, public interp_dimensions{ public: using InType = typename Types::in_type; - using OutType = typename Types::in_type; + using OutType = typename Types::out_type; // Arrays stored in std::vector. std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) @@ -388,5 +388,311 @@ TYPED_TEST(Edges2cellsScalarLibTestFixture, cells2edges) { } } -// TODO: -// cells2verts, cells2verts ri, verts2cells, cell_avg_lib \ No newline at end of file +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2verts +// +//////////////////////////////////////////////////////////////////////////////// + + +// Define a typed test fixture with one precision of data +template <typename Types> +class Cells2vertsScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<OutType> coeff_int; // Dimensions: (nproma, 9-cell_type, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + const int cell_type = 9; + + Cells2vertsScalarLibTestFixture() { + // Allocate and initialize inputs. + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); + vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_v), static_cast<OutType>(1)); + + // Allocate output arrays and initialize to zero. + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Cells2vertsScalarLibTestFixture, MixedTypes); + +// Add test +TYPED_TEST(Cells2vertsScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cells2verts_scalar_lib<InType,OutType>( + this->p_cell_in.data(), + this->vert_cell_idx.data(), + this->vert_cell_blk.data(), + this->coeff_int.data(), + this->p_vert_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_c, + this->nblks_v, + this->lacc, + this->acc_async); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2verts ri +// +//////////////////////////////////////////////////////////////////////////////// + + +// Define a typed test fixture with one precision of data +template <typename Types> +class Cells2vertsriScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<InType> coeff_int; // Dimensions: (nproma, 9-cell_type, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + const int cell_type = 9; + + Cells2vertsriScalarLibTestFixture() { + // Allocate and initialize inputs. + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); + vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); + +// Add test +TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cells2verts_scalar_ri_lib<InType,OutType>( + this->p_cell_in.data(), + this->vert_cell_idx.data(), + this->vert_cell_blk.data(), + this->coeff_int.data(), + this->p_vert_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_c, + this->nblks_v, + this->lacc, + this->acc_async); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// ! verts2cells +// +//////////////////////////////////////////////////////////////////////////////// + + +// Define a typed test fixture with one precision of data +template <typename Types> +class Verts2cellsScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<InType> coeff_int; // Dimensions: (nproma, 3-cell_type, nblks_c) + std::vector<OutType> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + + const int cell_type = 3; + const int npromz_c = 32; + + Verts2cellsScalarLibTestFixture() { + // Allocate and initialize inputs. + p_vert_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_v), static_cast<InType>(1)); + cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_c), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_cell_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(Verts2cellsScalarLibTestFixture, OneTypes); + +// Add test +TYPED_TEST(Verts2cellsScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + verts2cells_scalar_lib<InType>( + this->p_vert_in.data(), + this->cell_index_idx.data(), + this->cell_index_blk.data(), + this->coeff_int.data(), + this->p_cell_out.data(), + this->nblks_c, + this->npromz_c, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_v, + this->lacc); + + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<OutType>(3), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cell_avg +// +//////////////////////////////////////////////////////////////////////////////// + + +// Define a typed test fixture with one precision of data +template <typename Types> +class CellAvgScalarLibTestFixture : public testing::Test, public interp_dimensions{ +public: + using InType = typename Types::in_type; + using OutType = typename Types::out_type; + + // Arrays stored in std::vector. + std::vector<InType> psi_c; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<InType> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) + std::vector<OutType> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) + + CellAvgScalarLibTestFixture() { + // Allocate and initialize inputs. + psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + avg_coeff.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + avg_psi_c.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); + } +}; + +// Add test suite +TYPED_TEST_SUITE(CellAvgScalarLibTestFixture, OneTypes); + +// Add test +TYPED_TEST(CellAvgScalarLibTestFixture, cells2edges) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function + cell_avg_lib<InType>( + this->psi_c.data(), + this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), + this->avg_coeff.data(), + this->avg_psi_c.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_c, + this->lacc); + + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 4 stencil points, expect 4. + EXPECT_NEAR(this->avg_psi_c[idx], static_cast<OutType>(4), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab From a051e487a1f105dc2679375688a83a766c6ec8a3 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 17:59:32 +0100 Subject: [PATCH 21/36] restoring rbf_vec_interpol_vertex_lib.cpp --- .../mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp index 0a41fe8..c9b776e 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -101,7 +101,7 @@ void rbf_vec_interpol_vertex_lib( int i_startidx; // start index int i_endidx; // end index - for (jb=i_startblk; jb < i_endblk + 1; ++jb){ + for (jb=i_startblk; jb <= i_endblk; ++jb){ get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); -- GitLab From 7fbfbe9f382d22fa91f8b78b9909d9059993d9db Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Wed, 26 Feb 2025 17:34:21 +0000 Subject: [PATCH 22/36] Typo mo_lib_interpolation_scalar.cpp --- src/interpolation/mo_lib_interpolation_scalar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 9529125..9306728 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -408,7 +408,7 @@ void verts2cells_scalar_lib( int nlen; if (jb != nblks_c){ nlen = nproma; } - else { nlen = npromz_c} + else { nlen = npromz_c; } Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( {slev, 0}, {elev + 1, nlen + 1}); -- GitLab From 53be884790df4f6af1d0e745ea19010058903a7f Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Thu, 27 Feb 2025 10:49:32 +0100 Subject: [PATCH 23/36] Fixing mixed-precision cells2edges, and loop bounds in verts2cells --- src/interpolation/interpolation_bindings.cpp | 2 +- .../mo_lib_interpolation_scalar.cpp | 18 +++++++++--------- .../mo_lib_interpolation_scalar.hpp | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 4476035..f986447 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -127,7 +127,7 @@ void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_i const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - cells2edges_scalar_lib<double, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + cells2edges_scalar_lib<float, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 9306728..c59f928 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -75,8 +75,8 @@ void verts2edges_scalar_lib(const T* p_vertex_in, /// velocity points. /// template <typename T, typename S> -void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const T* coeff_int, T* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const S* coeff_int, S* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblk_c, const int nblks_e, const int patch_id, @@ -85,14 +85,14 @@ void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedConstS3D p_cell_in_view (p_cell_in, nproma, nlev, nblk_c); + UnmanagedConstT3D p_cell_in_view (p_cell_in, nproma, nlev, nblk_c); UnmanagedConstInt3D iidx_view (edge_cell_idx, nproma, nblks_e, 2); UnmanagedConstInt3D iblk_view (edge_cell_blk, nproma, nblks_e, 2); - UnmanagedConstT3D coeff_int_view (coeff_int, nproma, 2, nblks_e); - UnmanagedT3D p_edge_out_view (p_edge_out, nproma, nlev, nblks_e); + UnmanagedConstS3D coeff_int_view (coeff_int, nproma, 2, nblks_e); + UnmanagedS3D p_edge_out_view (p_edge_out, nproma, nlev, nblks_e); //Fill outermost nest boundary int i_startblk, i_endblk; @@ -411,7 +411,7 @@ void verts2cells_scalar_lib( else { nlen = npromz_c; } Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, 0}, {elev + 1, nlen + 1}); + {slev, 0}, {elev + 1, nlen}); Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -524,8 +524,8 @@ void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edg const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); -template -void cells2edges_scalar_lib<double, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, +template // sp2dp +void cells2edges_scalar_lib<float, double>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 95febf1..16223ec 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -22,8 +22,8 @@ void verts2edges_scalar_lib(const T* p_vertex_in, ; template <typename T, typename S> -void cells2edges_scalar_lib(const S* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const T* coeff_int, T* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const S* coeff_int, S* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblk_c, const int nblks_e, const int patch_id, -- GitLab From e253335c2b3720813d5e0f6398707fb771e12535 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Thu, 27 Feb 2025 10:50:29 +0100 Subject: [PATCH 24/36] mixed precision sp2dp in test_interpolation_scalar --- test/c/test_interpolation_scalar.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index c4e414d..ac2eddc 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -37,6 +37,10 @@ typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<double, float>, MixedPrecision<float, float> > MixedTypes; +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<float, double>, + MixedPrecision<float, float> > MixedTypesSP2DP; + // Define the list of type pairs we want to test. typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<float, float > > OneTypes; @@ -188,7 +192,7 @@ public: }; // Add test suite -TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, MixedTypes); +TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, MixedTypesSP2DP); // Add test TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { @@ -196,7 +200,7 @@ TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { using OutType = typename TestFixture::OutType; // Call the function - cells2edges_scalar_lib<InType>( + cells2edges_scalar_lib<InType,OutType>( this->p_cell_in.data(), this->edge_cell_idx.data(), this->edge_cell_blk.data(), -- GitLab From 03f3b980be7cff8bca90c6233c6bcc46a3b24050 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 27 Feb 2025 22:42:07 +0100 Subject: [PATCH 25/36] worked on two of the test cases --- test/c/test_interpolation_scalar.cpp | 187 ++++++++++++--------------- 1 file changed, 81 insertions(+), 106 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index ac2eddc..f46c2b4 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -67,48 +67,58 @@ public: const bool acc_async = false; // No asynchronous execution. }; -//////////////////////////////////////////////////////////////////////////////// -// -// ! verts2edges -// -//////////////////////////////////////////////////////////////////////////////// - -// Define a test fixture -template <typename Types> -class Verts2edgesScalarLibTestFixture : public testing::Test, public interp_dimensions { +template <typename T> +class InterpolationScalarTypedTestFixture : public ::testing::Test, public interp_dimensions { public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - // Arrays stored in std::vector. - std::vector<InType> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) + // Arrays used for verts2edges + std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) - std::vector<InType> coeff_int; // Dimensions: (nproma, 2, nblks_e) - std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + std::vector<T> coeff_int; // Dimensions: (nproma, 2, nblks_e) + std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) - Verts2edgesScalarLibTestFixture() { - // Allocate and initialize inputs. - p_vertex_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_v), static_cast<InType>(1)); + // Arrays used for edges2verts + std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) + std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) + std::vector<T> v_int; // Dimensions: (nproma, 6-cell_type, nblks_v) + std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + // + const int cell_type = 6; + + InterpolationScalarTypedTestFixture() { + // Allocate and initialize arrays needed for verts2edges + p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(1)); edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, 2, nblks_v), static_cast<InType>(1)); + coeff_int.resize(num_elements_3d<T>(nproma, 2, nblks_v), static_cast<T>(1)); - // Allocate output arrays and initialize to zero. - p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), static_cast<OutType>(0)); + p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(0)); + + // Allocate & Initialize arrays needed for edges2verts + p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(1)); + edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); + edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); + v_int.resize(num_elements_3d<T>(nproma, 6, nblks_v), static_cast<T>(1)); + + p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(0)); } }; -// Add test suite -TYPED_TEST_SUITE(Verts2edgesScalarLibTestFixture, OneTypes); +typedef ::testing::Types<float, double> SingleType; -// Add test -TYPED_TEST(Verts2edgesScalarLibTestFixture, verts2edges) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; +TYPED_TEST_SUITE(InterpolationScalarTypedTestFixture, SingleType); - // Call the function - verts2edges_scalar_lib<InType>( +//////////////////////////////////////////////////////////////////////////////// +// +// ! verts2edges +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { + + verts2edges_scalar_lib<TypeParam>( this->p_vertex_in.data(), this->edge_vertex_idx.data(), this->edge_vertex_blk.data(), @@ -134,13 +144,54 @@ TYPED_TEST(Verts2edgesScalarLibTestFixture, verts2edges) { // Compute the linear index for a 3D array in column-major order: size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; // Since every contribution is 1 and there are 2 stencil points, expect 2. - EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), static_cast<OutType>(1e-5)) + EXPECT_NEAR(this->p_edge_out[idx], static_cast<TypeParam>(2), static_cast<TypeParam>(1e-5)) << "Failure at block " << block << ", level " << level << ", index " << i; } } } } +//////////////////////////////////////////////////////////////////////////////// +// +// ! edges2verts +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { + + edges2verts_scalar_lib<TypeParam>( + this->p_edge_in.data(), + this->edge_vert_idx.data(), + this->edge_vert_blk.data(), + this->v_int.data(), + this->p_vert_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_e, + this->nblks_v, + this->cell_type, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<TypeParam>(6), static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} //////////////////////////////////////////////////////////////////////////////// // @@ -237,82 +288,6 @@ TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { } -//////////////////////////////////////////////////////////////////////////////// -// -// ! edges2verts -// -//////////////////////////////////////////////////////////////////////////////// - -// Define a typed test fixture with one precision of data -template <typename Types> -class Edges2vertsScalarLibTestFixture : public testing::Test, public interp_dimensions{ -public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - - // Arrays stored in std::vector. - std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) - std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) - std::vector<OutType> v_int; // Dimensions: (nproma, 6-cell_type, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - - const int cell_type = 6; - - Edges2vertsScalarLibTestFixture() { - // Allocate and initialize inputs. - p_edge_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); - edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); - edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); - v_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<OutType>(1)); - - // Allocate output arrays and initialize to zero. - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - } -}; - -// Add test suite -TYPED_TEST_SUITE(Edges2vertsScalarLibTestFixture, OneTypes); - -// Add test -TYPED_TEST(Edges2vertsScalarLibTestFixture, cells2edges) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; - - // Call the function - edges2verts_scalar_lib<InType>( - this->p_edge_in.data(), - this->edge_vert_idx.data(), - this->edge_vert_blk.data(), - this->v_int.data(), - this->p_vert_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_e, - this->nblks_v, - this->cell_type, - this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - } - } - } -} //////////////////////////////////////////////////////////////////////////////// -- GitLab From 21af82431c2db969beb0ecb4e36043966a9df47c Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 4 Mar 2025 16:48:15 +0100 Subject: [PATCH 26/36] removed cell_type as a function argument --- src/interpolation/interpolation_bindings.cpp | 8 ++++---- src/interpolation/interpolation_bindings.h | 6 +++--- src/interpolation/mo_lib_interpolation_scalar.cpp | 8 ++++---- src/interpolation/mo_lib_interpolation_scalar.hpp | 4 ++-- test/c/test_interpolation_scalar.cpp | 1 - 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index f986447..0535091 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -137,20 +137,20 @@ void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_id const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc){ + const int nblks_v, const bool lacc){ edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, - i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, cell_type,lacc); + i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); } void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc){ + const int nblks_v, const bool lacc){ edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, - i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, cell_type,lacc); + i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); } diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 123c487..14c1f34 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -78,14 +78,14 @@ extern "C"{ const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); + const int nblks_v, const bool lacc); void edges2verts_scalar_lib_sp (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); + const int nblks_v, const bool lacc); void edges2cells_scalar_lib_dp( const double* p_edge_in, const int* edge_idx, const int* edge_blk, @@ -183,4 +183,4 @@ extern "C"{ const int nlev, const int nblks_c, const bool lacc); -} \ No newline at end of file +} diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index c59f928..07403df 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -162,7 +162,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc){ + const int nblks_v, const bool lacc){ // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -172,8 +172,8 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const UnmanagedConstT3D p_edge_in_view (p_edge_in, nproma, nlev, nblks_e); UnmanagedConstInt3D iidx_view (vert_edge_idx, nproma, nblks_v, 5); UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 5); - UnmanagedConstT3D v_int_view (v_int, nproma,cell_type,nblks_v); UnmanagedT3D p_vert_out_view (p_vert_out, nproma,nlev,nblks_v); + UnmanagedConstT3D v_int_view (v_int, nproma, 6, nblks_v); for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { @@ -538,14 +538,14 @@ void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_ed const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); + const int nblks_v, const bool lacc); template void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); + const int nblks_v, const bool lacc); template diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 16223ec..78f7df4 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -34,7 +34,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const int cell_type, const bool lacc); + const int nblks_v, const bool lacc); template <typename T> void edges2cells_scalar_lib(const T* p_edge_in, const int* edge_idx, const int* edge_blk, @@ -71,4 +71,4 @@ void cell_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_ const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const bool lacc); \ No newline at end of file + const int nlev, const int nblks_c, const bool lacc); diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index f46c2b4..bad8277 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -175,7 +175,6 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { this->nlev, this->nblks_e, this->nblks_v, - this->cell_type, this->lacc); // Check the outputs only for blocks in the range -- GitLab From be37e502bacf1c9b542e431a0b25e8e25f297e79 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 4 Mar 2025 16:51:34 +0100 Subject: [PATCH 27/36] corrected dimensions for some of the arrays --- src/interpolation/mo_lib_interpolation_scalar.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 07403df..3ed58cb 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -170,10 +170,10 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; UnmanagedConstT3D p_edge_in_view (p_edge_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D iidx_view (vert_edge_idx, nproma, nblks_v, 5); - UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 5); - UnmanagedT3D p_vert_out_view (p_vert_out, nproma,nlev,nblks_v); + UnmanagedConstInt3D iidx_view (vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 6); UnmanagedConstT3D v_int_view (v_int, nproma, 6, nblks_v); + UnmanagedT3D p_vert_out_view (p_vert_out, nproma, nlev, nblks_v); for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { @@ -283,7 +283,7 @@ void cells2verts_scalar_lib( UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 6, nblks_v); // vertex based scalar output field, dim: (nproma,nlev,nblks_c) UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); @@ -342,7 +342,7 @@ void cells2verts_scalar_ri_lib( UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 9, nblks_v); + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 6, nblks_v); // vertex based scalar output field, dim: (nproma,nlev,nblks_c) UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); @@ -398,7 +398,7 @@ void verts2cells_scalar_lib( // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, 3); // cell_vertex_blk - // coefficients for interpolation, dim: (nproma,3-cell_type,nblks_c) + // coefficients for interpolation, dim: (nproma, 3, nblks_c) UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); // vertex based scalar output field, dim: (nproma,nlev,nblks_c) -- GitLab From e73d605403b5557f7203f32aaaf17b0bac8c6d63 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 4 Mar 2025 16:52:46 +0100 Subject: [PATCH 28/36] added few more tests for mo_interpolation_scalar --- test/c/test_interpolation_scalar.cpp | 419 +++++++++++---------------- 1 file changed, 175 insertions(+), 244 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index bad8277..2307cb8 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -72,27 +72,49 @@ class InterpolationScalarTypedTestFixture : public ::testing::Test, public inter public: // Arrays used for verts2edges - std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) - std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) - std::vector<T> coeff_int; // Dimensions: (nproma, 2, nblks_e) + std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) + std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) + std::vector<T> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) // Arrays used for edges2verts - std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) - std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) - std::vector<T> v_int; // Dimensions: (nproma, 6-cell_type, nblks_v) + std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) + std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) + std::vector<T> v_int; // Dimensions: (nproma, 6, nblks_v) std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - // + + // Arrays used for edges2cells + // std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) + std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + + // Arrays used for verts2cells + std::vector<T> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) + // std::vector<InType> coeff_int; // Dimensions: (nproma, 3, nblks_c) + // std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + + // Arrays used for avg_lib + std::vector<T> psi_c; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) + std::vector<T> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) + const int cell_type = 6; + const int npromz_c = 32; InterpolationScalarTypedTestFixture() { // Allocate and initialize arrays needed for verts2edges p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(1)); edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); - coeff_int.resize(num_elements_3d<T>(nproma, 2, nblks_v), static_cast<T>(1)); + coeff_int_edges.resize(num_elements_3d<T>(nproma, 2, nblks_e), static_cast<T>(1)); p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(0)); @@ -103,6 +125,33 @@ public: v_int.resize(num_elements_3d<T>(nproma, 6, nblks_v), static_cast<T>(1)); p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(0)); + + // Allocate & Initialize arrays needed for edges2cells + // p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(1)); + edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + coeff_int_cells.resize(num_elements_3d<T>(nproma, 3, nblks_c), static_cast<T>(1)); + + p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); + + // Allocate and initialize arrays needed for verts2cells + p_vert_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(1)); + cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + // coeff_int.resize(num_elements_3d<T>(nproma, cell_type, nblks_c), static_cast<T>(1)); + + // Allocate output arrays and initialize to zero. + // p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); + + // Allocate and initialize arrays needed for avg_lib + psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); + cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); + cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); + avg_coeff.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); + + // Allocate output arrays and initialize to zero. + avg_psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); + } }; @@ -122,7 +171,7 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { this->p_vertex_in.data(), this->edge_vertex_idx.data(), this->edge_vertex_blk.data(), - this->coeff_int.data(), + this->coeff_int_edges.data(), this->p_edge_out.data(), this->i_startblk, this->i_endblk, @@ -194,11 +243,125 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { //////////////////////////////////////////////////////////////////////////////// // -// ! cells2edges +// ! edges2cells +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { + + edges2cells_scalar_lib<TypeParam>( + this->p_edge_in.data(), + this->edge_idx.data(), + this->edge_blk.data(), + this->coeff_int_cells.data(), + this->p_cell_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_e, + this->nblks_c, + this->lacc); + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + +TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { + + verts2cells_scalar_lib<TypeParam>( + this->p_vert_in.data(), + this->cell_index_idx.data(), + this->cell_index_blk.data(), + this->coeff_int_cells.data(), + this->p_cell_out.data(), + this->nblks_c, + this->npromz_c, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_v, + this->lacc); + + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cell_avg // //////////////////////////////////////////////////////////////////////////////// +TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { + // Call the function + cell_avg_lib<TypeParam>( + this->psi_c.data(), + this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), + this->avg_coeff.data(), + this->avg_psi_c.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx, + this->i_endidx, + this->slev, + this->elev, + this->nproma, + this->nlev, + this->nblks_c, + this->lacc); + + + // Check the outputs only for blocks in the range + // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = this->slev; level < this->elev; ++level) { + for (int i = this->i_startidx; i < this->i_endidx; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 4 stencil points, expect 4. + EXPECT_NEAR(this->avg_psi_c[idx], static_cast<TypeParam>(4), static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2edges +// +//////////////////////////////////////////////////////////////////////////////// // Define a typed test fixture with one precision of data template <typename Types> @@ -287,85 +450,6 @@ TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { } - - -//////////////////////////////////////////////////////////////////////////////// -// -// ! edges2cells -// -//////////////////////////////////////////////////////////////////////////////// - - -// Define a typed test fixture with one precision of data -template <typename Types> -class Edges2cellsScalarLibTestFixture : public testing::Test, public interp_dimensions{ -public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - - // Arrays stored in std::vector. - std::vector<InType> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<OutType> coeff_int; // Dimensions: (nproma, 3-cell_type, nblks_c) - std::vector<OutType> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) - - const int cell_type = 3; - - Edges2cellsScalarLibTestFixture() { - // Allocate and initialize inputs. - p_edge_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); - edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, cell_type), 1); - edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, cell_type), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_c), static_cast<OutType>(1)); - - // Allocate output arrays and initialize to zero. - p_cell_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); - } -}; - -// Add test suite -TYPED_TEST_SUITE(Edges2cellsScalarLibTestFixture, OneTypes); - -// Add test -TYPED_TEST(Edges2cellsScalarLibTestFixture, cells2edges) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; - - // Call the function - edges2cells_scalar_lib<InType>( - this->p_edge_in.data(), - this->edge_idx.data(), - this->edge_blk.data(), - this->coeff_int.data(), - this->p_cell_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_e, - this->nblks_c, - this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<OutType>(3), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - } - } - } -} - //////////////////////////////////////////////////////////////////////////////// // // ! cells2verts @@ -521,156 +605,3 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2edges) { } } } - - -//////////////////////////////////////////////////////////////////////////////// -// -// ! verts2cells -// -//////////////////////////////////////////////////////////////////////////////// - - -// Define a typed test fixture with one precision of data -template <typename Types> -class Verts2cellsScalarLibTestFixture : public testing::Test, public interp_dimensions{ -public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - - // Arrays stored in std::vector. - std::vector<InType> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<InType> coeff_int; // Dimensions: (nproma, 3-cell_type, nblks_c) - std::vector<OutType> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) - - const int cell_type = 3; - const int npromz_c = 32; - - Verts2cellsScalarLibTestFixture() { - // Allocate and initialize inputs. - p_vert_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_v), static_cast<InType>(1)); - cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); - cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_c), static_cast<InType>(1)); - - // Allocate output arrays and initialize to zero. - p_cell_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); - } -}; - -// Add test suite -TYPED_TEST_SUITE(Verts2cellsScalarLibTestFixture, OneTypes); - -// Add test -TYPED_TEST(Verts2cellsScalarLibTestFixture, cells2edges) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; - - // Call the function - verts2cells_scalar_lib<InType>( - this->p_vert_in.data(), - this->cell_index_idx.data(), - this->cell_index_blk.data(), - this->coeff_int.data(), - this->p_cell_out.data(), - this->nblks_c, - this->npromz_c, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_v, - this->lacc); - - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<OutType>(3), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - } - } - } -} - - -//////////////////////////////////////////////////////////////////////////////// -// -// ! cell_avg -// -//////////////////////////////////////////////////////////////////////////////// - - -// Define a typed test fixture with one precision of data -template <typename Types> -class CellAvgScalarLibTestFixture : public testing::Test, public interp_dimensions{ -public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - - // Arrays stored in std::vector. - std::vector<InType> psi_c; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<InType> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) - std::vector<OutType> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) - - CellAvgScalarLibTestFixture() { - // Allocate and initialize inputs. - psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); - cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); - cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - avg_coeff.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); - - // Allocate output arrays and initialize to zero. - avg_psi_c.resize(num_elements_3d<OutType>(nproma, nlev, nblks_c), static_cast<OutType>(0)); - } -}; - -// Add test suite -TYPED_TEST_SUITE(CellAvgScalarLibTestFixture, OneTypes); - -// Add test -TYPED_TEST(CellAvgScalarLibTestFixture, cells2edges) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; - - // Call the function - cell_avg_lib<InType>( - this->psi_c.data(), - this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), - this->avg_coeff.data(), - this->avg_psi_c.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_c, - this->lacc); - - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 4 stencil points, expect 4. - EXPECT_NEAR(this->avg_psi_c[idx], static_cast<OutType>(4), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - } - } - } -} -- GitLab From c6fc4504549773608f791869155da741a2bcb7c0 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 13:27:04 +0100 Subject: [PATCH 29/36] added the use of loop_exchange in cells2verts_ri_scalar_lib --- src/interpolation/mo_lib_interpolation_scalar.cpp | 8 ++++++++ test/c/CMakeLists.txt | 4 ++++ test/c/test_interpolation_scalar.cpp | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 3ed58cb..6372870 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -345,7 +345,11 @@ void cells2verts_scalar_ri_lib( UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 6, nblks_v); // vertex based scalar output field, dim: (nproma,nlev,nblks_c) +#ifdef __LOOP_EXCHANGE UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); +#else + UnmanagedS3D p_vert_out_view(p_vert_out, nlev, nproma, nblks_c); +#endif int i_startidx, i_endidx; @@ -360,7 +364,11 @@ void cells2verts_scalar_ri_lib( Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { +#ifdef __LOOP_EXCHANGE p_vert_out_view(jv, jk, jb) = +#else + p_vert_out_view(jk, jv, jb) = +#endif coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 16371d0..c9320cb 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -21,6 +21,10 @@ FetchContent_MakeAvailable(googletest) # Find Kokkos (or use your existing Kokkos installation) # find_package(Kokkos REQUIRED) +if(IM_ENABLE_LOOP_EXCHANGE) + target_compile_definitions(iconmath-interpolation PRIVATE __LOOP_EXCHANGE) +endif() + set(SOURCES main.cpp test_tdma_solver.cpp diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 2307cb8..2e0be9c 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -597,7 +597,11 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2edges) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: +#ifdef __LOOP_EXCHANGE size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; +#else + size_t idx = level + i * this->nlev + block * this->nproma * this->nlev; +#endif // Since every contribution is 1 and there are 6 stencil points, expect 6. EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) << "Failure at block " << block << ", level " << level << ", index " << i; -- GitLab From b4c86e81158123f23645c8f949b1252bd13fc0d8 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 13:28:28 +0100 Subject: [PATCH 30/36] modified the routine cells2verts_scalar_lib --- src/interpolation/interpolation_bindings.cpp | 6 +++--- src/interpolation/mo_lib_interpolation_scalar.cpp | 8 ++++---- test/c/test_interpolation_scalar.cpp | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 0535091..d113952 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -209,14 +209,14 @@ void cells2verts_scalar_lib_dp( } void cells2verts_scalar_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { - cells2verts_scalar_lib<double,float>( + cells2verts_scalar_lib<float,double>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 6372870..26bd876 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -323,7 +323,7 @@ void cells2verts_scalar_ri_lib( const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { @@ -585,9 +585,9 @@ void cells2verts_scalar_lib<double,double>( const bool lacc, const bool acc_async); template -void cells2verts_scalar_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, +void cells2verts_scalar_lib<float,double>( + const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, + const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 2e0be9c..bd179c4 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -486,7 +486,7 @@ public: }; // Add test suite -TYPED_TEST_SUITE(Cells2vertsScalarLibTestFixture, MixedTypes); +TYPED_TEST_SUITE(Cells2vertsScalarLibTestFixture, MixedTypesSP2DP); // Add test TYPED_TEST(Cells2vertsScalarLibTestFixture, cells2edges) { -- GitLab From aab9f0354f42d178893683cf4576af7008aac0a8 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 15:38:51 +0100 Subject: [PATCH 31/36] added the last set of tests --- test/c/test_interpolation_scalar.cpp | 134 ++++++++++----------------- 1 file changed, 51 insertions(+), 83 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index bd179c4..6338a36 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -41,10 +41,6 @@ typedef ::testing::Types< MixedPrecision<double, double>, MixedPrecision<float, double>, MixedPrecision<float, float> > MixedTypesSP2DP; -// Define the list of type pairs we want to test. -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<float, float > > OneTypes; - // Shared dimensions for all routines and classes class interp_dimensions { @@ -357,67 +353,76 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { } } -//////////////////////////////////////////////////////////////////////////////// -// -// ! cells2edges -// -//////////////////////////////////////////////////////////////////////////////// - -// Define a typed test fixture with one precision of data -template <typename Types> -class Cells2edgesScalarLibTestFixture : public testing::Test, public interp_dimensions{ +template <typename TypePair> +class InterpolationScalarMixedTestFixture : public ::testing::Test, public interp_dimensions { public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; - // Arrays stored in std::vector. - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) - std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) - std::vector<OutType> coeff_int; // Dimensions: (nproma, 2, nblks_e) - std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + // Arrays used for cells2edges + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) + std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) + std::vector<OutType> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) + std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) - // TODO review these values + // Further parameters for cells2edges const int patch_id = 0; const bool l_limited_area = false; const bool lfill_latbc = false; - std::vector<int> i_startblk_in; // Dimensions: (2) - std::vector<int> i_endblk_in; // Dimensions: (2) - std::vector<int> i_startidx_in; // Dimensions: (2) - std::vector<int> i_endidx_in; // Dimensions: (2) - - Cells2edgesScalarLibTestFixture() { - // Allocate and initialize inputs. + std::vector<int> i_startblk_in; // Dimensions: (2) + std::vector<int> i_endblk_in; // Dimensions: (2) + std::vector<int> i_startidx_in; // Dimensions: (2) + std::vector<int> i_endidx_in; // Dimensions: (2) + + // Arrays used for cells2verts + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<OutType> coeff_int_verts; // Dimensions: (nproma, 6, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + + InterpolationScalarMixedTestFixture() { + // Allocate and initialize arrays needed for cells2edges p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); edge_cell_idx.resize(num_elements_3d<int>(nproma, nblks_e, 2), 1); edge_cell_blk.resize(num_elements_3d<int>(nproma, nblks_e, 2), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, 2, nblks_e), static_cast<OutType>(1)); + coeff_int_edges.resize(num_elements_3d<InType>(nproma, 2, nblks_e), static_cast<OutType>(1)); - // Allocate output arrays and initialize to zero. p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), static_cast<OutType>(0)); - // Allocate neighbour indexes + // Allocate neighbour indexes for cells2edges i_startblk_in.resize(2, i_startblk); i_endblk_in.resize(2, i_endblk); i_startidx_in.resize(2, i_startidx); i_endidx_in.resize(2, i_endidx); + + // Allocate & Initialize arrays needed for cells2verts + vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); + vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); + coeff_int_verts.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<OutType>(1)); + + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); } }; -// Add test suite -TYPED_TEST_SUITE(Cells2edgesScalarLibTestFixture, MixedTypesSP2DP); +TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); -// Add test -TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { +//////////////////////////////////////////////////////////////////////////////// +// +// ! cells2edges +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; + using OutType = typename TestFixture::OutType; // Call the function cells2edges_scalar_lib<InType,OutType>( this->p_cell_in.data(), this->edge_cell_idx.data(), this->edge_cell_blk.data(), - this->coeff_int.data(), + this->coeff_int_edges.data(), this->p_edge_out.data(), this->i_startblk_in.data(), this->i_endblk_in.data(), @@ -449,56 +454,21 @@ TYPED_TEST(Cells2edgesScalarLibTestFixture, cells2edges) { } } - //////////////////////////////////////////////////////////////////////////////// // // ! cells2verts // //////////////////////////////////////////////////////////////////////////////// - -// Define a typed test fixture with one precision of data -template <typename Types> -class Cells2vertsScalarLibTestFixture : public testing::Test, public interp_dimensions{ -public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; - - // Arrays stored in std::vector. - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) - std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<OutType> coeff_int; // Dimensions: (nproma, 9-cell_type, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - - const int cell_type = 9; - - Cells2vertsScalarLibTestFixture() { - // Allocate and initialize inputs. - p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); - vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); - vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_v), static_cast<OutType>(1)); - - // Allocate output arrays and initialize to zero. - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - } -}; - -// Add test suite -TYPED_TEST_SUITE(Cells2vertsScalarLibTestFixture, MixedTypesSP2DP); - -// Add test -TYPED_TEST(Cells2vertsScalarLibTestFixture, cells2edges) { +TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; + using OutType = typename TestFixture::OutType; - // Call the function cells2verts_scalar_lib<InType,OutType>( this->p_cell_in.data(), this->vert_cell_idx.data(), this->vert_cell_blk.data(), - this->coeff_int.data(), + this->coeff_int_verts.data(), this->p_vert_out.data(), this->i_startblk, this->i_endblk, @@ -534,8 +504,8 @@ TYPED_TEST(Cells2vertsScalarLibTestFixture, cells2edges) { // //////////////////////////////////////////////////////////////////////////////// - -// Define a typed test fixture with one precision of data +// The test for cells2verts_ri is similar to cells2verts, but is done here separtely +// to avoid as a differebt template instantiation is needed for the function call template <typename Types> class Cells2vertsriScalarLibTestFixture : public testing::Test, public interp_dimensions{ public: @@ -546,17 +516,15 @@ public: std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<InType> coeff_int; // Dimensions: (nproma, 9-cell_type, nblks_v) + std::vector<InType> coeff_int; // Dimensions: (nproma, 6, nblks_v) std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - const int cell_type = 9; - Cells2vertsriScalarLibTestFixture() { // Allocate and initialize inputs. p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, cell_type, nblks_v), static_cast<InType>(1)); + coeff_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<InType>(1)); // Allocate output arrays and initialize to zero. p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); @@ -567,7 +535,7 @@ public: TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test -TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2edges) { +TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; -- GitLab From 6e479502ea0799f4b9546c105ebc78e2028dc67d Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 15:52:34 +0100 Subject: [PATCH 32/36] removed whitespaces --- src/interpolation/interpolation_bindings.cpp | 178 +++++++-------- src/interpolation/interpolation_bindings.h | 96 ++++---- .../mo_lib_interpolation_scalar.cpp | 214 +++++++++--------- .../mo_lib_interpolation_scalar.hpp | 38 ++-- .../mo_lib_interpolation_vector.cpp | 4 +- test/c/test_interpolation_scalar.cpp | 20 +- 6 files changed, 275 insertions(+), 275 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index d113952..fc39ccd 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -46,7 +46,7 @@ void edges2cells_vector_lib_sp( int slev, int elev, int nproma, int nlev, int nblks_e, int nblks_c){ - + edges2cells_vector_lib<float>(p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, @@ -63,15 +63,15 @@ void verts2edges_scalar_lib_dp(const double* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const double* coeff_int, double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ verts2edges_scalar_lib<double>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, coeff_int, p_edge_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblks_v, nblks_e, lacc); } @@ -80,76 +80,76 @@ void verts2edges_scalar_lib_sp(const float* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const float* coeff_int, float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ verts2edges_scalar_lib<float>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, coeff_int, p_edge_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblks_v, nblks_e, lacc); } -void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - cells2edges_scalar_lib<double, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + + cells2edges_scalar_lib<double, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); } -void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - cells2edges_scalar_lib<float, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + + cells2edges_scalar_lib<float, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); } -void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - cells2edges_scalar_lib<float, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, + cells2edges_scalar_lib<float, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); } -void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc){ - edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, + edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); } -void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc){ - edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, + edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); } @@ -157,16 +157,16 @@ void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, void edges2cells_scalar_lib_dp( const double* p_edge_in, const int* edge_idx, const int* edge_blk, const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc) { edges2cells_scalar_lib<double>( p_edge_in, edge_idx, edge_blk, coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_e, nblks_c, lacc); } @@ -174,8 +174,8 @@ void edges2cells_scalar_lib_dp( void edges2cells_scalar_lib_sp( const float* p_edge_in, const int* edge_idx, const int* edge_blk, const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc) { @@ -183,8 +183,8 @@ void edges2cells_scalar_lib_sp( edges2cells_scalar_lib<float>( p_edge_in, edge_idx, edge_blk, coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_e, nblks_c, lacc); } @@ -194,16 +194,16 @@ void edges2cells_scalar_lib_sp( void cells2verts_scalar_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_lib<double,double>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -211,16 +211,16 @@ void cells2verts_scalar_lib_dp( void cells2verts_scalar_lib_dp2sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_lib<float,double>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -228,16 +228,16 @@ void cells2verts_scalar_lib_dp2sp( void cells2verts_scalar_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_lib<float,float>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -247,16 +247,16 @@ void cells2verts_scalar_lib_sp( void cells2verts_scalar_ri_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_ri_lib<double,double>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -264,16 +264,16 @@ void cells2verts_scalar_ri_lib_dp( void cells2verts_scalar_ri_lib_dp2sp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_ri_lib<double,float>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -281,16 +281,16 @@ void cells2verts_scalar_ri_lib_dp2sp( void cells2verts_scalar_ri_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { cells2verts_scalar_ri_lib<float,float>( p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, + i_startidx_in, i_endidx_in, + slev, elev, nproma, nlev, nblks_c, nblks_v, lacc, acc_async); } @@ -298,64 +298,64 @@ void cells2verts_scalar_ri_lib_sp( ///////////////////////////////////////////// void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc) { verts2cells_scalar_lib<double>( - p_vert_in, cell_index_idx, cell_vertex_blk, + p_vert_in, cell_index_idx, cell_vertex_blk, coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, + slev, elev, nproma, nlev, nblks_v, lacc); } void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc) { verts2cells_scalar_lib<float>( - p_vert_in, cell_index_idx, cell_vertex_blk, + p_vert_in, cell_index_idx, cell_vertex_blk, coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, + slev, elev, nproma, nlev, nblks_v, lacc); } ///////////////////////////////////////////// void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { cell_avg_lib<double>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, + slev, elev, nproma, + nlev, nblks_c, lacc); } - + void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { cell_avg_lib<float>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, + psi_c, cell_neighbor_idx, cell_neighbor_blk, + avg_coeff, avg_psi_c, i_startblk, i_endblk, i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, + slev, elev, nproma, + nlev, nblks_c, lacc); } - + diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 14c1f34..7c6b125 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -40,65 +40,65 @@ extern "C"{ const int* edge_vertex_idx, const int* edge_vertex_blk, const double* coeff_int, double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); void verts2edges_scalar_lib_sp(const float* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const float* coeff_int, float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - - void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + + void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); - void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, + void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); - void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, + void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); - void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, + void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc); - void edges2verts_scalar_lib_sp (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, + void edges2verts_scalar_lib_sp (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc); void edges2cells_scalar_lib_dp( const double* p_edge_in, const int* edge_idx, const int* edge_blk, const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc); void edges2cells_scalar_lib_sp( const float* p_edge_in, const int* edge_idx, const int* edge_blk, const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc); @@ -107,22 +107,22 @@ extern "C"{ void cells2verts_scalar_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); void cells2verts_scalar_lib_dp2sp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); void cells2verts_scalar_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); @@ -131,56 +131,56 @@ extern "C"{ void cells2verts_scalar_ri_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); void cells2verts_scalar_ri_lib_dp2sp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); void cells2verts_scalar_ri_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); ///////////////////////////////////////////// void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc); void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc); ///////////////////////////////////////////// void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); } diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 26bd876..7f37448 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -29,13 +29,13 @@ /// template <typename T> void verts2edges_scalar_lib(const T* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, + const int* edge_vertex_idx, const int* edge_vertex_blk, const T* coeff_int, T* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - + // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; @@ -46,7 +46,7 @@ void verts2edges_scalar_lib(const T* p_vertex_in, UnmanagedConstInt3D iblk_view (edge_vertex_blk, nproma,nblks_e, 4); UnmanagedConstT3D coeff_int_view (coeff_int, nproma,2,nblks_e); UnmanagedT3D p_edge_out_view (p_edge_out, nproma,nlev,nblks_e); - + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; @@ -54,17 +54,17 @@ void verts2edges_scalar_lib(const T* p_vertex_in, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("verts2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + Kokkos::parallel_for("verts2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { - - p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * - p_vertex_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + + + p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * + p_vertex_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + coeff_int_view(je, 1, jb)*p_vertex_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - + }); Kokkos::fence(); - } + } } //------------------------------------------------------------------------ @@ -75,12 +75,12 @@ void verts2edges_scalar_lib(const T* p_vertex_in, /// velocity points. /// template <typename T, typename S> -void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const S* coeff_int, S* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const S* coeff_int, S* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc){ + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc){ // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -93,7 +93,7 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const UnmanagedConstInt3D iblk_view (edge_cell_blk, nproma, nblks_e, 2); UnmanagedConstS3D coeff_int_view (coeff_int, nproma, 2, nblks_e); UnmanagedS3D p_edge_out_view (p_edge_out, nproma, nlev, nblks_e); - + //Fill outermost nest boundary int i_startblk, i_endblk; if ((l_limited_area || patch_id > 0) && (lfill_latbc)){ @@ -103,13 +103,13 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; - get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { - + if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); } @@ -119,7 +119,7 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const else{ std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling" << std::endl; std::exit(EXIT_FAILURE); - } + } }); Kokkos::fence(); } @@ -132,20 +132,20 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; - get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int je) { - - p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * + + p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + coeff_int_view(je, 1, jb) * p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - + }); Kokkos::fence(); - } + } } } @@ -158,9 +158,9 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const /// centers of dual faces. /// template <typename T> -void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc){ @@ -174,7 +174,7 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 6); UnmanagedConstT3D v_int_view (v_int, nproma, 6, nblks_v); UnmanagedT3D p_vert_out_view (p_vert_out, nproma, nlev, nblks_v); - + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { int i_startidx, i_endidx; @@ -182,16 +182,16 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("edges2verts_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> + Kokkos::parallel_for("edges2verts_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> ({slev, i_startidx}, {elev + 1, i_endidx + 1}), KOKKOS_LAMBDA(const int jk, const int jv) { - - p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) - + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) - + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) - + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) - + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) - + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + + p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); }); Kokkos::fence(); } @@ -209,8 +209,8 @@ template <typename T> void edges2cells_scalar_lib( const T* p_edge_in, const int* edge_idx, const int* edge_blk, const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc) { @@ -262,8 +262,8 @@ template <typename T, typename S> void cells2verts_scalar_lib( const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) { @@ -297,7 +297,7 @@ void cells2verts_scalar_lib( Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( {slev, i_startidx}, {elev + 1, i_endidx + 1}); - + Kokkos::parallel_for("cells2verts_scalar_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { @@ -322,7 +322,7 @@ template <typename T, typename S> void cells2verts_scalar_ri_lib( const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async) @@ -360,7 +360,7 @@ void cells2verts_scalar_ri_lib( Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( {slev, i_startidx}, {elev + 1, i_endidx + 1}); - + Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { @@ -387,9 +387,9 @@ void cells2verts_scalar_ri_lib( /// template <typename T> void verts2cells_scalar_lib( - const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc) { // Wrap raw pointers in unmanaged Kokkos Views. @@ -447,8 +447,8 @@ void verts2cells_scalar_lib( /// template <typename T> void cell_avg_lib( - const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, + const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc) @@ -482,8 +482,8 @@ void cell_avg_lib( Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { // calculate the weighted average - - avg_psi_c_view(jc, jk, jb) = + + avg_psi_c_view(jc, jk, jb) = psi_c_view(jc, jk, jb)*avg_coeff_view(jc, 0, jb) + psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + @@ -503,8 +503,8 @@ void verts2edges_scalar_lib<double>(const double* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const double* coeff_int, double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); template @@ -512,46 +512,46 @@ void verts2edges_scalar_lib<float>(const float* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const float* coeff_int, float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); template -void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); template -void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const float* coeff_int, float* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); template // sp2dp -void cells2edges_scalar_lib<float, double>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib<float, double>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); template -void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc); template -void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc); @@ -560,8 +560,8 @@ template void edges2cells_scalar_lib<double>( const double* p_edge_in, const int* edge_idx, const int* edge_blk, const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc); @@ -569,98 +569,98 @@ template void edges2cells_scalar_lib<float>( const float* p_edge_in, const int* edge_idx, const int* edge_blk, const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc); -template +template void cells2verts_scalar_lib<double,double>( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void cells2verts_scalar_lib<float,double>( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void cells2verts_scalar_lib<float,float>( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void cells2verts_scalar_ri_lib<double,double>( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void cells2verts_scalar_ri_lib<double,float>( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void cells2verts_scalar_ri_lib<float,float>( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); -template +template void verts2cells_scalar_lib<double>( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc); -template +template void verts2cells_scalar_lib<float>( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, + const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc); template void cell_avg_lib<double>( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, + const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); - + template void cell_avg_lib<float>( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, + const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 78f7df4..85c8fc5 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -13,62 +13,62 @@ template <typename T> void verts2edges_scalar_lib(const T* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, + const int* edge_vertex_idx, const int* edge_vertex_blk, const T* coeff_int, T* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, + const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); ; template <typename T, typename S> -void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const S* coeff_int, S* p_edge_out, const int* i_startblk_in, +void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, + const S* coeff_int, S* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, + const int nblk_c, const int nblks_e, const int patch_id, const bool l_limited_area, const bool lfill_latbc, const bool lacc); template <typename T> -void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, +void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, + const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_v, const bool lacc); template <typename T> void edges2cells_scalar_lib(const T* p_edge_in, const int* edge_idx, const int* edge_blk, const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_e, const int nblks_c, const bool lacc); template <typename T, typename S> void cells2verts_scalar_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); template <typename T, typename S> void cells2verts_scalar_ri_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); template <typename T> -void verts2cells_scalar_lib(const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, +void verts2cells_scalar_lib(const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, + const int slev, const int elev, const int nproma, const int nlev, const int nblks_v, const bool lacc); template <typename T> -void cell_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, +void cell_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, + const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index 74c2be7..fa1ed32 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -31,7 +31,7 @@ void edges2cells_vector_lib( typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); @@ -90,7 +90,7 @@ void edges2cells_vector_lib( } } -template +template void edges2cells_vector_lib<double>( const double* p_vn_in, const double* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 6338a36..606d1e9 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -181,7 +181,7 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { this->nblks_e, this->lacc); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx, i_endidx] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -222,7 +222,7 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { this->nblks_v, this->lacc); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -263,7 +263,7 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { this->nblks_c, this->lacc); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -294,9 +294,9 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { this->nlev, this->nblks_v, this->lacc); - - // Check the outputs only for blocks in the range + + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -336,9 +336,9 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { this->nlev, this->nblks_c, this->lacc); - - // Check the outputs only for blocks in the range + + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -439,7 +439,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { this->lfill_latbc, this->lacc); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -483,7 +483,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { this->lacc, this->acc_async); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { @@ -559,7 +559,7 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { this->lacc, this->acc_async); - // Check the outputs only for blocks in the range + // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = this->slev; level < this->elev; ++level) { -- GitLab From 8eecaebb14a552b5d968996bf7da67b5d4bf2da0 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 17:09:48 +0100 Subject: [PATCH 33/36] added some comments in the binding file --- src/interpolation/interpolation_bindings.cpp | 32 +++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index fc39ccd..dda576c 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -13,7 +13,7 @@ #include "mo_lib_interpolation_vector.hpp" #include "mo_lib_interpolation_scalar.hpp" -//mo_lib_interpolation_vector.F90 +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib (wp=dp) void edges2cells_vector_lib_dp( const double* p_vn_in, const double* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, @@ -36,6 +36,7 @@ void edges2cells_vector_lib_dp( nlev, nblks_e, nblks_c); } +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib (wp=sp) void edges2cells_vector_lib_sp( const float* p_vn_in, const float* p_vt_in, const int* cell_edge_idx, const int* cell_edge_blk, @@ -58,7 +59,7 @@ void edges2cells_vector_lib_sp( nlev, nblks_e, nblks_c); } -//mo_lib_interpolation_scalar.F90 +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib (wp=dp) void verts2edges_scalar_lib_dp(const double* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const double* coeff_int, @@ -76,6 +77,7 @@ void verts2edges_scalar_lib_dp(const double* p_vertex_in, } +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib (wp=sp) void verts2edges_scalar_lib_sp(const float* p_vertex_in, const int* edge_vertex_idx, const int* edge_vertex_blk, const float* coeff_int, @@ -93,6 +95,7 @@ void verts2edges_scalar_lib_sp(const float* p_vertex_in, } +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_dp_lib void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, @@ -106,7 +109,7 @@ void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx } - +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp_lib void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const float* coeff_int, float* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, @@ -120,6 +123,7 @@ void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, } +// This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp2dp_lib void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, const double* coeff_int, double* p_edge_out, const int* i_startblk_in, const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, @@ -133,6 +137,7 @@ void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_i } +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib (wp=dp) void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, @@ -143,6 +148,7 @@ void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_id i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); } +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib (wp=sp) void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, @@ -154,6 +160,7 @@ void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, } +// This is the binding for mo_interpolation_scalar::edges2cells_scalar_dp_lib void edges2cells_scalar_lib_dp( const double* p_edge_in, const int* edge_idx, const int* edge_blk, const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, @@ -171,6 +178,7 @@ void edges2cells_scalar_lib_dp( lacc); } +// This is the binding for mo_interpolation_scalar::edges2cells_scalar_sp_lib void edges2cells_scalar_lib_sp( const float* p_edge_in, const int* edge_idx, const int* edge_blk, const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, @@ -189,8 +197,7 @@ void edges2cells_scalar_lib_sp( lacc); } -///////////////////////////////////////////// - +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp_lib void cells2verts_scalar_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, @@ -208,6 +215,7 @@ void cells2verts_scalar_lib_dp( lacc, acc_async); } +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp2sp_lib void cells2verts_scalar_lib_dp2sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, @@ -225,6 +233,7 @@ void cells2verts_scalar_lib_dp2sp( lacc, acc_async); } +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_sp_lib void cells2verts_scalar_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, @@ -242,8 +251,7 @@ void cells2verts_scalar_lib_sp( lacc, acc_async); } -///////////////////////////////////////////// - +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=dp, vp=dp) void cells2verts_scalar_ri_lib_dp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, @@ -261,6 +269,7 @@ void cells2verts_scalar_ri_lib_dp( lacc, acc_async); } +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=dp, vp=sp) void cells2verts_scalar_ri_lib_dp2sp( const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, @@ -278,6 +287,7 @@ void cells2verts_scalar_ri_lib_dp2sp( lacc, acc_async); } +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=sp, vp=sp) void cells2verts_scalar_ri_lib_sp( const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, @@ -295,8 +305,7 @@ void cells2verts_scalar_ri_lib_sp( lacc, acc_async); } -///////////////////////////////////////////// - +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib (wp=dp) void verts2cells_scalar_lib_dp( const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, @@ -310,6 +319,7 @@ void verts2cells_scalar_lib_dp( nblks_v, lacc); } +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib (wp=sp) void verts2cells_scalar_lib_sp( const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, @@ -323,8 +333,7 @@ void verts2cells_scalar_lib_sp( nblks_v, lacc); } -///////////////////////////////////////////// - +// This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=dp) void cell_avg_lib_dp( const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, @@ -342,6 +351,7 @@ void cell_avg_lib_dp( lacc); } +// This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=sp) void cell_avg_lib_sp( const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, -- GitLab From 60308a94a76d9669319856a53309b9688f345e62 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 17:35:41 +0100 Subject: [PATCH 34/36] formatted the cpp code in interpolation_scalar_lib a little --- .../mo_lib_interpolation_scalar.cpp | 103 ++++++++++-------- 1 file changed, 58 insertions(+), 45 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 7f37448..b2b51ac 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -54,15 +54,17 @@ void verts2edges_scalar_lib(const T* p_vertex_in, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("verts2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev + 1, i_endidx + 1}), - KOKKOS_LAMBDA(const int jk, const int je) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("verts2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * p_vertex_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + coeff_int_view(je, 1, jb)*p_vertex_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - - }); + } + ); Kokkos::fence(); } } @@ -106,21 +108,24 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev + 1, i_endidx + 1}), - KOKKOS_LAMBDA(const int jk, const int je) { - - if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ - p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); - } - else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0){ - p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + + if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ + p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); + } + else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0){ + p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + } + else{ + std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling" << std::endl; + std::exit(EXIT_FAILURE); + } } - else{ - std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling" << std::endl; - std::exit(EXIT_FAILURE); - } - }); + ); Kokkos::fence(); } } @@ -135,15 +140,16 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("cells2edges_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev + 1, i_endidx + 1}), - KOKKOS_LAMBDA(const int jk, const int je) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for("cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + coeff_int_view(je, 1, jb) * p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - - }); + } + ); Kokkos::fence(); } } @@ -182,17 +188,19 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::parallel_for("edges2verts_scalar", Kokkos::MDRangePolicy<Kokkos::Rank<2>> - ({slev, i_startidx}, {elev + 1, i_endidx + 1}), - KOKKOS_LAMBDA(const int jk, const int jv) { - - p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) - + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) - + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) - + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) - + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) - + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - }); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("edges2verts_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); + } + ); Kokkos::fence(); } @@ -245,11 +253,12 @@ void edges2cells_scalar_lib( Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_cell_out_view(jc, jk, jb) = - coeff_int_view(jc, 0, jb)*p_edge_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + - coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); - }); + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb)*p_edge_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); + } + ); } } @@ -309,7 +318,8 @@ void cells2verts_scalar_lib( coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - }); + } + ); } } @@ -376,7 +386,8 @@ void cells2verts_scalar_ri_lib( coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - }); + } + ); } } @@ -429,7 +440,8 @@ void verts2cells_scalar_lib( coeff_int_view(jc, 1, jb)*p_vert_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + coeff_int_view(jc, 2, jb)*p_vert_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); - }); + } + ); } } @@ -488,8 +500,9 @@ void cell_avg_lib( psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); - }); - } + } + ); + } } //----------------------------------------------------------------------- -- GitLab From 853b97cec3f0240b932f9a3dbcb7d132ab9d204c Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 17:37:31 +0100 Subject: [PATCH 35/36] added the missing kokkos::fence --- src/interpolation/mo_lib_interpolation_scalar.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index b2b51ac..386ce0c 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -259,6 +259,7 @@ void edges2cells_scalar_lib( coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); } ); + Kokkos::fence(); } } @@ -320,6 +321,7 @@ void cells2verts_scalar_lib( } ); + Kokkos::fence(); } } @@ -388,6 +390,7 @@ void cells2verts_scalar_ri_lib( } ); + Kokkos::fence(); } } @@ -442,6 +445,7 @@ void verts2cells_scalar_lib( } ); + Kokkos::fence(); } } @@ -502,6 +506,7 @@ void cell_avg_lib( psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); } ); + Kokkos::fence(); } } -- GitLab From baab213a5a00d74a58c42696760e1fa8d8cef548 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 5 Mar 2025 17:57:07 +0100 Subject: [PATCH 36/36] formatted the cpp codes using clang-format --- src/interpolation/interpolation_bindings.cpp | 553 ++++---- src/interpolation/interpolation_bindings.h | 348 ++--- .../mo_lib_interpolation_scalar.cpp | 1169 +++++++++-------- .../mo_lib_interpolation_scalar.hpp | 94 +- .../mo_lib_interpolation_vector.cpp | 175 +-- .../mo_lib_interpolation_vector.hpp | 21 +- test/c/test_interpolation_scalar.cpp | 493 ++++--- test/c/test_interpolation_vector.cpp | 70 +- 8 files changed, 1456 insertions(+), 1467 deletions(-) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index dda576c..628f411 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -10,362 +10,319 @@ // --------------------------------------------------------------- #include "interpolation_bindings.h" -#include "mo_lib_interpolation_vector.hpp" #include "mo_lib_interpolation_scalar.hpp" +#include "mo_lib_interpolation_vector.hpp" -// This is the binding for mo_interpolation_vector::edges2cells_vector_lib (wp=dp) -void edges2cells_vector_lib_dp( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c){ - - edges2cells_vector_lib<double>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib +// (wp=dp) +void edges2cells_vector_lib_dp(const double *p_vn_in, const double *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, + const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, + int nblks_e, int nblks_c) { + + edges2cells_vector_lib<double>( + p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); } -// This is the binding for mo_interpolation_vector::edges2cells_vector_lib (wp=sp) -void edges2cells_vector_lib_sp( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c){ - - edges2cells_vector_lib<float>(p_vn_in, p_vt_in, - cell_edge_idx, cell_edge_blk, - e_bln_c_u, e_bln_c_v, - p_u_out, p_v_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); +// This is the binding for mo_interpolation_vector::edges2cells_vector_lib +// (wp=sp) +void edges2cells_vector_lib_sp(const float *p_vn_in, const float *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, + const float *e_bln_c_v, float *p_u_out, + float *p_v_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_e, + int nblks_c) { + + edges2cells_vector_lib<float>( + p_vn_in, p_vt_in, cell_edge_idx, cell_edge_blk, e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); } -// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib (wp=dp) -void verts2edges_scalar_lib_dp(const double* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const double* coeff_int, - double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - - verts2edges_scalar_lib<double>(p_vertex_in, - edge_vertex_idx, edge_vertex_blk, coeff_int, - p_edge_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, nlev, nblks_v, nblks_e, lacc); - +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib +// (wp=dp) +void verts2edges_scalar_lib_dp( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc) { + + verts2edges_scalar_lib<double>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, + coeff_int, p_edge_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_v, nblks_e, lacc); } -// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib (wp=sp) -void verts2edges_scalar_lib_sp(const float* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const float* coeff_int, - float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - - verts2edges_scalar_lib<float>(p_vertex_in, - edge_vertex_idx, edge_vertex_blk, coeff_int, - p_edge_out, - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, slev, elev, - nproma, nlev, nblks_v, nblks_e, lacc); - +// This is the binding for mo_interpolation_scalar::verts2edges_scalar_lib +// (wp=sp) +void verts2edges_scalar_lib_sp( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc) { + + verts2edges_scalar_lib<float>(p_vertex_in, edge_vertex_idx, edge_vertex_blk, + coeff_int, p_edge_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_v, nblks_e, lacc); } // This is the binding for mo_interpolation_scalar::cells2edges_scalar_dp_lib -void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - cells2edges_scalar_lib<double, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, - i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, - nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); - +void cells2edges_scalar_lib_dp( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<double, double>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); } // This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp_lib -void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - cells2edges_scalar_lib<float, float>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, - i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, - nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); - +void cells2edges_scalar_lib_sp(const float *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const float *coeff_int, + float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<float, float>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); } // This is the binding for mo_interpolation_scalar::cells2edges_scalar_sp2dp_lib -void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - cells2edges_scalar_lib<float, double>(p_cell_in,edge_cell_idx,edge_cell_blk,coeff_int,p_edge_out,i_startblk_in, - i_endblk_in,i_startidx_in,i_endidx_in,slev,elev,nproma,nlev, - nblk_c,nblks_e,patch_id,l_limited_area,lfill_latbc,lacc); - +void cells2edges_scalar_lib_sp2dp( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc) { + + cells2edges_scalar_lib<float, double>( + p_cell_in, edge_cell_idx, edge_cell_blk, coeff_int, p_edge_out, + i_startblk_in, i_endblk_in, i_startidx_in, i_endidx_in, slev, elev, + nproma, nlev, nblk_c, nblks_e, patch_id, l_limited_area, lfill_latbc, + lacc); } -// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib (wp=dp) -void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc){ +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib +// (wp=dp) +void edges2verts_scalar_lib_dp( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc) { - edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, - i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); + edges2verts_scalar_lib<double>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int, + p_vert_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_e, nblks_v, lacc); } -// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib (wp=sp) -void edges2verts_scalar_lib_sp(const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc){ - - edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int,p_vert_out,i_startblk, - i_endblk,i_startidx_in,i_endidx_in, slev, elev, nproma, nlev,nblks_e, nblks_v, lacc); - +// This is the binding for mo_interpolation_scalar::edges2verts_scalar_lib +// (wp=sp) +void edges2verts_scalar_lib_sp(const float *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const float *v_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc) { + + edges2verts_scalar_lib<float>(p_edge_in, vert_edge_idx, vert_edge_blk, v_int, + p_vert_out, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_e, + nblks_v, lacc); } // This is the binding for mo_interpolation_scalar::edges2cells_scalar_dp_lib -void edges2cells_scalar_lib_dp( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - edges2cells_scalar_lib<double>( - p_edge_in, edge_idx, edge_blk, - coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_e, nblks_c, - lacc); +void edges2cells_scalar_lib_dp(const double *p_edge_in, const int *edge_idx, + const int *edge_blk, const double *coeff_int, + double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + edges2cells_scalar_lib<double>(p_edge_in, edge_idx, edge_blk, coeff_int, + p_cell_out, i_startblk, i_endblk, + i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_e, nblks_c, lacc); } // This is the binding for mo_interpolation_scalar::edges2cells_scalar_sp_lib -void edges2cells_scalar_lib_sp( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - - edges2cells_scalar_lib<float>( - p_edge_in, edge_idx, edge_blk, - coeff_int, p_cell_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_e, nblks_c, - lacc); +void edges2cells_scalar_lib_sp(const float *p_edge_in, const int *edge_idx, + const int *edge_blk, const float *coeff_int, + float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + + edges2cells_scalar_lib<float>(p_edge_in, edge_idx, edge_blk, coeff_int, + p_cell_out, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_e, + nblks_c, lacc); } // This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp_lib void cells2verts_scalar_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<double,double>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_lib<double, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } // This is the binding for mo_interpolation_scalar::cells2verts_scalar_dp2sp_lib void cells2verts_scalar_lib_dp2sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<float,double>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_lib<float, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } // This is the binding for mo_interpolation_scalar::cells2verts_scalar_sp_lib -void cells2verts_scalar_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_lib<float,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); +void cells2verts_scalar_lib_sp(const float *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const float *coeff_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + cells2verts_scalar_lib<float, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } -// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=dp, vp=dp) +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=dp, vp=dp) void cells2verts_scalar_ri_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<double,double>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<double, double>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } -// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=dp, vp=sp) +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=dp, vp=sp) void cells2verts_scalar_ri_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<double,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<double, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } -// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib (wp=sp, vp=sp) +// This is the binding for mo_interpolation_scalar::cells2verts_scalar_ri_lib +// (wp=sp, vp=sp) void cells2verts_scalar_ri_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - cells2verts_scalar_ri_lib<float,float>( - p_cell_in, vert_cell_idx, vert_cell_blk, - coeff_int, p_vert_out, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, nblks_v, - lacc, acc_async); + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async) { + cells2verts_scalar_ri_lib<float, float>( + p_cell_in, vert_cell_idx, vert_cell_blk, coeff_int, p_vert_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_v, lacc, acc_async); } -// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib (wp=dp) +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib +// (wp=dp) void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - verts2cells_scalar_lib<double>( - p_vert_in, cell_index_idx, cell_vertex_blk, - coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, - nblks_v, lacc); + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc) { + verts2cells_scalar_lib<double>(p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, slev, + elev, nproma, nlev, nblks_v, lacc); } -// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib (wp=sp) +// This is the binding for mo_interpolation_scalar::verts2cells_scalar_lib +// (wp=sp) void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - verts2cells_scalar_lib<float>( - p_vert_in, cell_index_idx, cell_vertex_blk, - coeff_int, p_cell_out, nblks_c, npromz_c, - slev, elev, nproma, nlev, - nblks_v, lacc); + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc) { + verts2cells_scalar_lib<float>(p_vert_in, cell_index_idx, cell_vertex_blk, + coeff_int, p_cell_out, nblks_c, npromz_c, slev, + elev, nproma, nlev, nblks_v, lacc); } // This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=dp) -void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc) -{ - cell_avg_lib<double>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, - lacc); +void cell_avg_lib_dp(const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, + double *avg_psi_c, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_c, + const bool lacc) { + cell_avg_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk, avg_coeff, + avg_psi_c, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } // This is the binding for mo_interpolation_scalar::cell_avg_lib (wp=sp) -void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc) -{ - cell_avg_lib<float>( - psi_c, cell_neighbor_idx, cell_neighbor_blk, - avg_coeff, avg_psi_c, i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, nproma, - nlev, nblks_c, - lacc); +void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { + cell_avg_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk, avg_coeff, + avg_psi_c, i_startblk, i_endblk, i_startidx_in, + i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } - diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 7c6b125..7cb873d 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -10,177 +10,179 @@ // --------------------------------------------------------------- #pragma once -extern "C"{ - - //mo_lib_interpolation_vector.F90 - void edges2cells_vector_lib_dp( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); - - void edges2cells_vector_lib_sp( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - int nlev, int nblks_e, int nblks_c); - - //mo_lib_interpolation_scalar.F90 - void verts2edges_scalar_lib_dp(const double* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const double* coeff_int, - double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - - void verts2edges_scalar_lib_sp(const float* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const float* coeff_int, - float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - - void cells2edges_scalar_lib_dp(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - - - void cells2edges_scalar_lib_sp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - - void cells2edges_scalar_lib_sp2dp(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - - void edges2verts_scalar_lib_dp (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc); - - - void edges2verts_scalar_lib_sp (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc); - - void edges2cells_scalar_lib_dp( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - void edges2cells_scalar_lib_sp( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - - ///////////////////////////////////////////// - - void cells2verts_scalar_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - void cells2verts_scalar_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - void cells2verts_scalar_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - ///////////////////////////////////////////// - - void cells2verts_scalar_ri_lib_dp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - void cells2verts_scalar_ri_lib_dp2sp( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - void cells2verts_scalar_ri_lib_sp( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - ///////////////////////////////////////////// - - void verts2cells_scalar_lib_dp( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - - void verts2cells_scalar_lib_sp( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - - ///////////////////////////////////////////// - - void cell_avg_lib_dp( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); - void cell_avg_lib_sp( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); - +extern "C" { + +// mo_lib_interpolation_vector.F90 +void edges2cells_vector_lib_dp(const double *p_vn_in, const double *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, + const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int nlev, + int nblks_e, int nblks_c); + +void edges2cells_vector_lib_sp(const float *p_vn_in, const float *p_vt_in, + const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, + const float *e_bln_c_v, float *p_u_out, + float *p_v_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_e, + int nblks_c); + +// mo_lib_interpolation_scalar.F90 +void verts2edges_scalar_lib_dp( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +void verts2edges_scalar_lib_sp( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +void cells2edges_scalar_lib_dp( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +void cells2edges_scalar_lib_sp(const float *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const float *coeff_int, + float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc); + +void cells2edges_scalar_lib_sp2dp( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +void edges2verts_scalar_lib_dp( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +void edges2verts_scalar_lib_sp(const float *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const float *v_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc); + +void edges2cells_scalar_lib_dp(const double *p_edge_in, const int *edge_idx, + const int *edge_blk, const double *coeff_int, + double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc); +void edges2cells_scalar_lib_sp(const float *p_edge_in, const int *edge_idx, + const int *edge_blk, const float *coeff_int, + float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc); + +///////////////////////////////////////////// + +void cells2verts_scalar_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); +void cells2verts_scalar_lib_dp2sp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); +void cells2verts_scalar_lib_sp(const float *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const float *coeff_int, + float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async); + +///////////////////////////////////////////// + +void cells2verts_scalar_ri_lib_dp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +void cells2verts_scalar_ri_lib_dp2sp( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +void cells2verts_scalar_ri_lib_sp( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +///////////////////////////////////////////// + +void verts2cells_scalar_lib_dp( + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +void verts2cells_scalar_lib_sp( + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +///////////////////////////////////////////// + +void cell_avg_lib_dp(const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, + double *avg_psi_c, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_c, + const bool lacc); +void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); } diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 386ce0c..9e4e6c5 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -9,10 +9,10 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include "mo_lib_interpolation_scalar.hpp" +#include "mo_lib_loopindices.hpp" #include <Kokkos_Core.hpp> #include <iostream> -#include "mo_lib_loopindices.hpp" -#include "mo_lib_interpolation_scalar.hpp" //----------------------------------------------------------------------- // @@ -28,45 +28,52 @@ /// The coefficients are given by coeff_int. /// template <typename T> -void verts2edges_scalar_lib(const T* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const T* coeff_int, - T* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc){ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - UnmanagedConstT3D p_vertex_in_view (p_vertex_in, nproma,nlev,nblks_v); - UnmanagedConstInt3D iidx_view (edge_vertex_idx,nproma,nblks_e, 4); - UnmanagedConstInt3D iblk_view (edge_vertex_blk, nproma,nblks_e, 4); - UnmanagedConstT3D coeff_int_view (coeff_int, nproma,2,nblks_e); - UnmanagedT3D p_edge_out_view (p_edge_out, nproma,nlev,nblks_e); - - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, - jb, i_startblk, i_endblk, - i_startidx, i_endidx); +void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const T *coeff_int, + T *p_edge_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_v, const int nblks_e, + const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_vertex_in_view(p_vertex_in, nproma, nlev, nblks_v); + UnmanagedConstInt3D iidx_view(edge_vertex_idx, nproma, nblks_e, 4); + UnmanagedConstInt3D iblk_view(edge_vertex_blk, nproma, nblks_e, 4); + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 2, nblks_e); + UnmanagedT3D p_edge_out_view(p_edge_out, nproma, nlev, nblks_e); + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("verts2edges_scalar", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int je) { - - p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * - p_vertex_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + - coeff_int_view(je, 1, jb)*p_vertex_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - } - ); - Kokkos::fence(); - } + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "verts2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_edge_out_view(je, jk, jb) = + coeff_int_view(je, 0, jb) * + p_vertex_in_view(iidx_view(je, jb, 0), jk, + iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb) * + p_vertex_in_view(iidx_view(je, jb, 1), jk, + iblk_view(je, jb, 1)); + }); + Kokkos::fence(); + } } //------------------------------------------------------------------------ @@ -77,83 +84,95 @@ void verts2edges_scalar_lib(const T* p_vertex_in, /// velocity points. /// template <typename T, typename S> -void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const S* coeff_int, S* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc){ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - UnmanagedConstT3D p_cell_in_view (p_cell_in, nproma, nlev, nblk_c); - UnmanagedConstInt3D iidx_view (edge_cell_idx, nproma, nblks_e, 2); - UnmanagedConstInt3D iblk_view (edge_cell_blk, nproma, nblks_e, 2); - UnmanagedConstS3D coeff_int_view (coeff_int, nproma, 2, nblks_e); - UnmanagedS3D p_edge_out_view (p_edge_out, nproma, nlev, nblks_e); - - //Fill outermost nest boundary - int i_startblk, i_endblk; - if ((l_limited_area || patch_id > 0) && (lfill_latbc)){ - i_startblk = i_startblk_in[0]; - i_endblk = i_endblk_in[0]; - - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - - int i_startidx, i_endidx; - get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cells2edges_scalar", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int je) { - - if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0){ - p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); - } - else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0){ - p_edge_out_view(je, jk, jb) = p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - } - else{ - std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling" << std::endl; - std::exit(EXIT_FAILURE); - } - } - ); - Kokkos::fence(); - } - } - else{ - //Process the remaining grid points for which a real interpolation is possible - i_startblk = i_startblk_in[1]; - i_endblk = i_endblk_in[1]; - - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - - int i_startidx, i_endidx; - get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk, i_endblk, i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cells2edges_scalar", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int je) { - p_edge_out_view(je, jk, jb) = coeff_int_view(je, 0, jb) * - p_cell_in_view(iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)) + - coeff_int_view(je, 1, jb) * p_cell_in_view(iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); - } - ); - Kokkos::fence(); - } +void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const S *coeff_int, + S *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstS3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblk_c); + UnmanagedConstInt3D iidx_view(edge_cell_idx, nproma, nblks_e, 2); + UnmanagedConstInt3D iblk_view(edge_cell_blk, nproma, nblks_e, 2); + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 2, nblks_e); + UnmanagedS3D p_edge_out_view(p_edge_out, nproma, nlev, nblks_e); + + // Fill outermost nest boundary + int i_startblk, i_endblk; + if ((l_limited_area || patch_id > 0) && (lfill_latbc)) { + i_startblk = i_startblk_in[0]; + i_endblk = i_endblk_in[0]; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + if (iidx_view(je, jb, 0) >= 0 && iblk_view(je, jb, 0) >= 0) { + p_edge_out_view(je, jk, jb) = p_cell_in_view( + iidx_view(je, jb, 0), jk, iblk_view(je, jb, 0)); + } else if (iidx_view(je, jb, 1) >= 0 && iblk_view(je, jb, 1) >= 0) { + p_edge_out_view(je, jk, jb) = p_cell_in_view( + iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); + } else { + std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in " + "lateral boundary filling" + << std::endl; + std::exit(EXIT_FAILURE); + } + }); + Kokkos::fence(); } + } else { + // Process the remaining grid points for which a real interpolation is + // possible + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "cells2edges_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_edge_out_view(je, jk, jb) = + coeff_int_view(je, 0, jb) * + p_cell_in_view(iidx_view(je, jb, 0), jk, + iblk_view(je, jb, 0)) + + coeff_int_view(je, 1, jb) * + p_cell_in_view(iidx_view(je, jb, 1), jk, + iblk_view(je, jb, 1)); + }); + Kokkos::fence(); + } + } } //------------------------------------------------------------------------ @@ -164,46 +183,58 @@ void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const /// centers of dual faces. /// template <typename T> -void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc){ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - UnmanagedConstT3D p_edge_in_view (p_edge_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D iidx_view (vert_edge_idx, nproma, nblks_v, 6); - UnmanagedConstInt3D iblk_view (vert_edge_blk, nproma,nblks_v, 6); - UnmanagedConstT3D v_int_view (v_int, nproma, 6, nblks_v); - UnmanagedT3D p_vert_out_view (p_vert_out, nproma, nlev, nblks_v); +void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const T *v_int, + T *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc) { + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D iidx_view(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk_view(vert_edge_blk, nproma, nblks_v, 6); + UnmanagedConstT3D v_int_view(v_int, nproma, 6, nblks_v); + UnmanagedT3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); - int i_startidx, i_endidx; - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, - jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("edges2verts_scalar", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - p_vert_out_view(jv, jk, jb) = v_int_view(jv, 0, jb)*p_edge_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) - + v_int_view(jv, 1, jb)*p_edge_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) - + v_int_view(jv, 2, jb)*p_edge_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) - + v_int_view(jv, 3, jb)*p_edge_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) - + v_int_view(jv, 4, jb)*p_edge_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) - + v_int_view(jv, 5, jb)*p_edge_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - } - ); - Kokkos::fence(); - } + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "edges2verts_scalar", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + p_vert_out_view(jv, jk, jb) = + v_int_view(jv, 0, jb) * p_edge_in_view(iidx_view(jv, jb, 0), jk, + iblk_view(jv, jb, 0)) + + v_int_view(jv, 1, jb) * p_edge_in_view(iidx_view(jv, jb, 1), jk, + iblk_view(jv, jb, 1)) + + v_int_view(jv, 2, jb) * p_edge_in_view(iidx_view(jv, jb, 2), jk, + iblk_view(jv, jb, 2)) + + v_int_view(jv, 3, jb) * p_edge_in_view(iidx_view(jv, jb, 3), jk, + iblk_view(jv, jb, 3)) + + v_int_view(jv, 4, jb) * p_edge_in_view(iidx_view(jv, jb, 4), jk, + iblk_view(jv, jb, 4)) + + v_int_view(jv, 5, jb) * p_edge_in_view(iidx_view(jv, jb, 5), jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } } //------------------------------------------------------------------------ @@ -214,54 +245,64 @@ void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const /// cell centers via given interpolation weights /// template <typename T> -void edges2cells_scalar_lib( - const T* p_edge_in, const int* edge_idx, const int* edge_blk, - const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // edge based scalar input field, dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); - - // line indices of edges of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view - - // block indices of edges of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view - - // coefficients for (area weighted) interpolation, dim: (nproma,3-cell_type,nblks_c) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); - - // cell based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("edges2cells_scalar_lib_inner", innerPolicy, +void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx, + const int *edge_blk, const T *coeff_int, + T *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, + const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // edge based scalar input field, dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_edge_in_view(p_edge_in, nproma, nlev, nblks_e); + + // line indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(edge_idx, nproma, nblks_c, 3); // edge_idx_view + + // block indices of edges of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(edge_blk, nproma, nblks_c, 3); // edge_blk_view + + // coefficients for (area weighted) interpolation, dim: + // (nproma,3-cell_type,nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // cell based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "edges2cells_scalar_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_cell_out_view(jc, jk, jb) = - coeff_int_view(jc, 0, jb)*p_edge_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + - coeff_int_view(jc, 1, jb)*p_edge_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_edge_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); - } - ); - Kokkos::fence(); - } - + p_cell_out_view(jc, jk, jb) = + coeff_int_view(jc, 0, jb) * p_edge_in_view(iidx_view(jc, jb, 0), + jk, + iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb) * p_edge_in_view(iidx_view(jc, jb, 1), + jk, + iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb) * p_edge_in_view(iidx_view(jc, jb, 2), + jk, + iblk_view(jc, jb, 2)); + }); + Kokkos::fence(); + } } //------------------------------------------------------------------------ @@ -269,60 +310,77 @@ void edges2cells_scalar_lib( /// Computes average of scalar fields from centers of cells to vertices. /// template <typename T, typename S> -void cells2verts_scalar_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstS3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - - // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view - - // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view - - // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 6, nblks_v); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cells2verts_scalar_lib", innerPolicy, +void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const S *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstS3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, + 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, + 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstS3D coeff_int_view(coeff_int, nproma, 6, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cells2verts_scalar_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { - p_vert_out_view(jv, jk, jb) = - coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + - coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + - coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + - coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + - coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + - coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - - } - ); - Kokkos::fence(); - } + coeff_int_view(jv, 0, jb) * p_cell_in_view(iidx_view(jv, jb, 0), + jk, + iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb) * p_cell_in_view(iidx_view(jv, jb, 1), + jk, + iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb) * p_cell_in_view(iidx_view(jv, jb, 2), + jk, + iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb) * p_cell_in_view(iidx_view(jv, jb, 3), + jk, + iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb) * p_cell_in_view(iidx_view(jv, jb, 4), + jk, + iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb) * p_cell_in_view(iidx_view(jv, jb, 5), + jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } } //------------------------------------------------------------------------- @@ -331,49 +389,56 @@ void cells2verts_scalar_lib( /// assumes reversed index order of the output field in loop exchange mode /// template <typename T, typename S> -void cells2verts_scalar_ri_lib( - const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); - - // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, 6); // vert_cell_idx_view - - // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) - UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, 6); // vert_cell_blk_view - - // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 6, nblks_v); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) +void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const T *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, + const bool lacc, const bool acc_async) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + + // line indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iidx_view(vert_cell_idx, nproma, nblks_v, + 6); // vert_cell_idx_view + + // block indices of cells around each vertex, dim: (nproma,nblks_v, 6) + UnmanagedConstInt3D iblk_view(vert_cell_blk, nproma, nblks_v, + 6); // vert_cell_blk_view + + // coefficients for interpolation, dim: (nproma,9-cell_type,nblks_v) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 6, nblks_v); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) #ifdef __LOOP_EXCHANGE - UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); + UnmanagedS3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_c); #else - UnmanagedS3D p_vert_out_view(p_vert_out, nlev, nproma, nblks_c); + UnmanagedS3D p_vert_out_view(p_vert_out, nlev, nproma, nblks_c); #endif - int i_startidx, i_endidx; + int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb){ + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); - Kokkos::parallel_for("cells2verts_scalar_ri_lib", innerPolicy, + Kokkos::parallel_for( + "cells2verts_scalar_ri_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { #ifdef __LOOP_EXCHANGE @@ -381,18 +446,27 @@ void cells2verts_scalar_ri_lib( #else p_vert_out_view(jk, jv, jb) = #endif - coeff_int_view(jv, 0, jb)*p_cell_in_view(iidx_view(jv, jb, 0), jk, iblk_view(jv, jb, 0)) + - coeff_int_view(jv, 1, jb)*p_cell_in_view(iidx_view(jv, jb, 1), jk, iblk_view(jv, jb, 1)) + - coeff_int_view(jv, 2, jb)*p_cell_in_view(iidx_view(jv, jb, 2), jk, iblk_view(jv, jb, 2)) + - coeff_int_view(jv, 3, jb)*p_cell_in_view(iidx_view(jv, jb, 3), jk, iblk_view(jv, jb, 3)) + - coeff_int_view(jv, 4, jb)*p_cell_in_view(iidx_view(jv, jb, 4), jk, iblk_view(jv, jb, 4)) + - coeff_int_view(jv, 5, jb)*p_cell_in_view(iidx_view(jv, jb, 5), jk, iblk_view(jv, jb, 5)); - - } - ); - Kokkos::fence(); - } - + coeff_int_view(jv, 0, jb) * p_cell_in_view(iidx_view(jv, jb, 0), + jk, + iblk_view(jv, jb, 0)) + + coeff_int_view(jv, 1, jb) * p_cell_in_view(iidx_view(jv, jb, 1), + jk, + iblk_view(jv, jb, 1)) + + coeff_int_view(jv, 2, jb) * p_cell_in_view(iidx_view(jv, jb, 2), + jk, + iblk_view(jv, jb, 2)) + + coeff_int_view(jv, 3, jb) * p_cell_in_view(iidx_view(jv, jb, 3), + jk, + iblk_view(jv, jb, 3)) + + coeff_int_view(jv, 4, jb) * p_cell_in_view(iidx_view(jv, jb, 4), + jk, + iblk_view(jv, jb, 4)) + + coeff_int_view(jv, 5, jb) * p_cell_in_view(iidx_view(jv, jb, 5), + jk, + iblk_view(jv, jb, 5)); + }); + Kokkos::fence(); + } } //------------------------------------------------------------------------- @@ -400,56 +474,68 @@ void cells2verts_scalar_ri_lib( /// Computes average of scalar fields from vertices to centers of cells. /// template <typename T> -void verts2cells_scalar_lib( - const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based scalar input field, dim: (nproma,nlev,nblks_v) - UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); - - // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, 3); // cell_vertex_idx - - // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, 3); // cell_vertex_blk - - // coefficients for interpolation, dim: (nproma, 3, nblks_c) - UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); - - // vertex based scalar output field, dim: (nproma,nlev,nblks_c) - UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); - - for (int jb = 0; jb<nblks_c; ++jb){ +void verts2cells_scalar_lib(const T *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const T *coeff_int, + T *p_cell_out, const int nblks_c, + const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, + const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based scalar input field, dim: (nproma,nlev,nblks_v) + UnmanagedConstT3D p_vert_in_view(p_vert_in, nproma, nlev, nblks_v); + + // line indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_index_idx, nproma, nblks_c, + 3); // cell_vertex_idx + + // block indices of vertices of triangles, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_vertex_blk, nproma, nblks_c, + 3); // cell_vertex_blk + + // coefficients for interpolation, dim: (nproma, 3, nblks_c) + UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 3, nblks_c); + + // vertex based scalar output field, dim: (nproma,nlev,nblks_c) + UnmanagedT3D p_cell_out_view(p_cell_out, nproma, nlev, nblks_c); + + for (int jb = 0; jb < nblks_c; ++jb) { + + int nlen; + if (jb != nblks_c) { + nlen = nproma; + } else { + nlen = npromz_c; + } - int nlen; - if (jb != nblks_c){ nlen = nproma; } - else { nlen = npromz_c; } + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, 0}, + {elev + 1, nlen}); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, 0}, {elev + 1, nlen}); - - Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, + Kokkos::parallel_for( + "cell_avg_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_cell_out_view(jc, jk, jb) = - coeff_int_view(jc, 0, jb)*p_vert_in_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) + - coeff_int_view(jc, 1, jb)*p_vert_in_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) + - coeff_int_view(jc, 2, jb)*p_vert_in_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)); - - } - ); - Kokkos::fence(); - } + coeff_int_view(jc, 0, jb) * p_vert_in_view(iidx_view(jc, jb, 0), + jk, + iblk_view(jc, jb, 0)) + + coeff_int_view(jc, 1, jb) * p_vert_in_view(iidx_view(jc, jb, 1), + jk, + iblk_view(jc, jb, 1)) + + coeff_int_view(jc, 2, jb) * p_vert_in_view(iidx_view(jc, jb, 2), + jk, + iblk_view(jc, jb, 2)); + }); + Kokkos::fence(); + } } - //------------------------------------------------------------------------- //> /// Computes the average of a cell-based variable. @@ -462,52 +548,60 @@ void verts2cells_scalar_lib( /// output: lives on centers of triangles /// template <typename T> -void cell_avg_lib( - const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const bool lacc) -{ - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - - // cell based variable before averaging, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); - // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, 3); // cell_neighbour_idx - // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) - UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk - // averaging coefficients, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); - - // cell based variable after averaging, dim: (nproma,nlev,nblks_c) - UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); - - int i_startidx, i_endidx; - - for (int jb = i_startblk; jb<i_endblk + 1; ++jb){ - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("cell_avg_lib_inner", innerPolicy, +void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *avg_coeff, + T *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + // cell based variable before averaging, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D psi_c_view(psi_c, nproma, nlev, nblks_c); + // line indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iidx_view(cell_neighbor_idx, nproma, nblks_c, + 3); // cell_neighbour_idx + // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) + UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, + 3); // cell_neighbour_blk + // averaging coefficients, dim: (nproma,nlev,nblks_c) + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + // cell based variable after averaging, dim: (nproma,nlev,nblks_c) + UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); + + int i_startidx, i_endidx; + + for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "cell_avg_lib_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - // calculate the weighted average - - avg_psi_c_view(jc, jk, jb) = - psi_c_view(jc, jk, jb)*avg_coeff_view(jc, 0, jb) + - psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0))*avg_coeff_view(jc, 1, jb) + - psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1))*avg_coeff_view(jc, 2, jb) + - psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2))*avg_coeff_view(jc, 3, jb); - } - ); - Kokkos::fence(); - } + // calculate the weighted average + + avg_psi_c_view(jc, jk, jb) = + psi_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + psi_c_view(iidx_view(jc, jb, 0), jk, iblk_view(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + psi_c_view(iidx_view(jc, jb, 1), jk, iblk_view(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + psi_c_view(iidx_view(jc, jb, 2), jk, iblk_view(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + Kokkos::fence(); + } } //----------------------------------------------------------------------- @@ -516,169 +610,144 @@ void cell_avg_lib( // //----------------------------------------------------------------------- -template -void verts2edges_scalar_lib<double>(const double* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const double* coeff_int, - double* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - -template -void verts2edges_scalar_lib<float>(const float* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const float* coeff_int, - float* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); - -template -void cells2edges_scalar_lib<double, double>(const double* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - -template -void cells2edges_scalar_lib<float, float>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const float* coeff_int, float* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - -template // sp2dp -void cells2edges_scalar_lib<float, double>(const float* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const double* coeff_int, double* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); - - -template -void edges2verts_scalar_lib<double> (const double* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const double* v_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc); - -template -void edges2verts_scalar_lib <float> (const float* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const float* v_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc); - - -template -void edges2cells_scalar_lib<double>( - const double* p_edge_in, const int* edge_idx, const int* edge_blk, - const double* coeff_int, double* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - -template -void edges2cells_scalar_lib<float>( - const float* p_edge_in, const int* edge_idx, const int* edge_blk, - const float* coeff_int, float* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, - const bool lacc); - - -template -void cells2verts_scalar_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<float,double>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - -template -void cells2verts_scalar_ri_lib<double,double>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, double* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<double,float>( - const double* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const double* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - -template -void cells2verts_scalar_ri_lib<float,float>( - const float* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const float* coeff_int, float* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, - const bool lacc, const bool acc_async); - - - -template -void verts2cells_scalar_lib<double>( - const double* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const double* coeff_int, double* p_cell_out, const int nblks_c, const int npromz_c, +template void verts2edges_scalar_lib<double>( + const double *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template void verts2edges_scalar_lib<float>( + const float *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_v, const int nblks_e, const bool lacc); + +template void cells2edges_scalar_lib<double, double>( + const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template void cells2edges_scalar_lib<float, float>( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const float *coeff_int, float *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +// sp2dp +template void cells2edges_scalar_lib<float, double>( + const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk, + const double *coeff_int, double *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, const int patch_id, + const bool l_limited_area, const bool lfill_latbc, const bool lacc); + +template void edges2verts_scalar_lib<double>( + const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const double *v_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +template void edges2verts_scalar_lib<float>( + const float *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk, + const float *v_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, const bool lacc); + +template void edges2cells_scalar_lib<double>( + const double *p_edge_in, const int *edge_idx, const int *edge_blk, + const double *coeff_int, double *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, const bool lacc); + +template void edges2cells_scalar_lib<float>( + const float *p_edge_in, const int *edge_idx, const int *edge_blk, + const float *coeff_int, float *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, const bool lacc); + +template void cells2verts_scalar_lib<double, double>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_lib<float, double>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_lib<float, float>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_ri_lib<double, double>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, double *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void cells2verts_scalar_ri_lib<double, float>( + const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const double *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); -template -void verts2cells_scalar_lib<float>( - const float* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const float* coeff_int, float* p_cell_out, const int nblks_c, const int npromz_c, +template void cells2verts_scalar_ri_lib<float, float>( + const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk, + const float *coeff_int, float *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); - - -template -void cell_avg_lib<double>( - const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const double* avg_coeff, double* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); - -template -void cell_avg_lib<float>( - const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const float* avg_coeff, float* avg_psi_c, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, - const bool lacc); + const int nblks_c, const int nblks_v, const bool lacc, + const bool acc_async); + +template void verts2cells_scalar_lib<double>( + const double *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +template void verts2cells_scalar_lib<float>( + const float *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out, + const int nblks_c, const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, const bool lacc); + +template void cell_avg_lib<double>( + const double *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *avg_coeff, double *avg_psi_c, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); + +template void +cell_avg_lib<float>(const float *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *avg_coeff, + float *avg_psi_c, const int i_startblk, const int i_endblk, + const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, + const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_scalar.hpp b/src/interpolation/mo_lib_interpolation_scalar.hpp index 85c8fc5..8c8d2de 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.hpp +++ b/src/interpolation/mo_lib_interpolation_scalar.hpp @@ -12,63 +12,79 @@ #pragma once template <typename T> -void verts2edges_scalar_lib(const T* p_vertex_in, - const int* edge_vertex_idx, const int* edge_vertex_blk, - const T* coeff_int, - T* p_edge_out, - const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, - const int nproma, const int nlev, const int nblks_v, const int nblks_e, const bool lacc); +void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx, + const int *edge_vertex_blk, const T *coeff_int, + T *p_edge_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_v, const int nblks_e, + const bool lacc); ; template <typename T, typename S> -void cells2edges_scalar_lib(const T* p_cell_in, const int* edge_cell_idx, const int* edge_cell_blk, - const S* coeff_int, S* p_edge_out, const int* i_startblk_in, - const int* i_endblk_in, const int* i_startidx_in, const int* i_endidx_in, - const int slev, const int elev, const int nproma, const int nlev, - const int nblk_c, const int nblks_e, const int patch_id, - const bool l_limited_area, const bool lfill_latbc, const bool lacc); +void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, + const int *edge_cell_blk, const S *coeff_int, + S *p_edge_out, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblk_c, const int nblks_e, + const int patch_id, const bool l_limited_area, + const bool lfill_latbc, const bool lacc); template <typename T> -void edges2verts_scalar_lib (const T* p_edge_in, const int* vert_edge_idx, const int* vert_edge_blk, - const T* v_int, T* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, const int slev, - const int elev, const int nproma, const int nlev, const int nblks_e, - const int nblks_v, const bool lacc); +void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx, + const int *vert_edge_blk, const T *v_int, + T *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_v, + const bool lacc); template <typename T> -void edges2cells_scalar_lib(const T* p_edge_in, const int* edge_idx, const int* edge_blk, - const T* coeff_int, T* p_cell_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_e, const int nblks_c, +void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx, + const int *edge_blk, const T *coeff_int, + T *p_cell_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_e, const int nblks_c, const bool lacc); template <typename T, typename S> -void cells2verts_scalar_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const S* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, +void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const S *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); template <typename T, typename S> -void cells2verts_scalar_ri_lib(const T* p_cell_in, const int* vert_cell_idx, const int* vert_cell_blk, - const T* coeff_int, S* p_vert_out, const int i_startblk, const int i_endblk, - const int i_startidx_in, const int i_endidx_in, - const int slev, const int elev, const int nproma, - const int nlev, const int nblks_c, const int nblks_v, +void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx, + const int *vert_cell_blk, const T *coeff_int, + S *p_vert_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, + const int elev, const int nproma, const int nlev, + const int nblks_c, const int nblks_v, const bool lacc, const bool acc_async); template <typename T> -void verts2cells_scalar_lib(const T* p_vert_in, const int* cell_index_idx, const int* cell_vertex_blk, - const T* coeff_int, T* p_cell_out, const int nblks_c, const int npromz_c, - const int slev, const int elev, const int nproma, const int nlev, - const int nblks_v, const bool lacc); +void verts2cells_scalar_lib(const T *p_vert_in, const int *cell_index_idx, + const int *cell_vertex_blk, const T *coeff_int, + T *p_cell_out, const int nblks_c, + const int npromz_c, const int slev, const int elev, + const int nproma, const int nlev, const int nblks_v, + const bool lacc); template <typename T> -void cell_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, - const T* avg_coeff, T* avg_psi_c, const int i_startblk, const int i_endblk, +void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *avg_coeff, + T *avg_psi_c, const int i_startblk, const int i_endblk, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc); diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp index fa1ed32..8e6a28e 100644 --- a/src/interpolation/mo_lib_interpolation_vector.cpp +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -12,108 +12,111 @@ #include "mo_lib_interpolation_vector.hpp" template <typename T> -void edges2cells_vector_lib( - const T* p_vn_in, const T* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* e_bln_c_u, const T* e_bln_c_v, - T* p_u_out, T* p_v_out, - // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - // Dimensions for the arrays. - int nlev, int nblks_e, int nblks_c) -{ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; +void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in, + const int *cell_edge_idx, const int *cell_edge_blk, + const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out, + T *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; - UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); - UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); - UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); - UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); - UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); - UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); - UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); - // Loop over cell blocks as in the original Fortran code. - for (int jb = i_startblk; jb <= i_endblk; ++jb) { - // Call get_indices_c_lib to get inner loop indices for block jb. - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, - jb, i_startblk, i_endblk, - i_startidx, i_endidx); + // Loop over cell blocks as in the original Fortran code. + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Call get_indices_c_lib to get inner loop indices for block jb. + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - Kokkos::parallel_for("edges2cells_inner", innerPolicy, + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "edges2cells_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - // Compute the bilinear interpolation for cell (jc, jk, jb). - p_u_out_view(jc, jk, jb) = - e_bln_c_u_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_u_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_u_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_u_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1); - p_v_out_view(jc, jk, jb) = - e_bln_c_v_view(jc, 0, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 1, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + - e_bln_c_v_view(jc, 2, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 3, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + - e_bln_c_v_view(jc, 4, jb) * - p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + - e_bln_c_v_view(jc, 5, jb) * - p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, + cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, + cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, + cell_edge_blk_view(jc, jb, 2) - 1); }); - // Optionally fence after each block if required. - Kokkos::fence(); - } + // Optionally fence after each block if required. + Kokkos::fence(); + } } -template -void edges2cells_vector_lib<double>( - const double* p_vn_in, const double* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const double* e_bln_c_u, const double* e_bln_c_v, - double* p_u_out, double* p_v_out, +template void edges2cells_vector_lib<double>( + const double *p_vn_in, const double *p_vt_in, const int *cell_edge_idx, + const int *cell_edge_blk, const double *e_bln_c_u, const double *e_bln_c_v, + double *p_u_out, double *p_v_out, // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, // Dimensions for the arrays. int nlev, int nblks_e, int nblks_c); -template -void edges2cells_vector_lib<float>( - const float* p_vn_in, const float* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const float* e_bln_c_u, const float* e_bln_c_v, - float* p_u_out, float* p_v_out, +template void edges2cells_vector_lib<float>( + const float *p_vn_in, const float *p_vt_in, const int *cell_edge_idx, + const int *cell_edge_blk, const float *e_bln_c_u, const float *e_bln_c_v, + float *p_u_out, float *p_v_out, // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, // Dimensions for the arrays. int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp index 1ee6c5b..9186997 100644 --- a/src/interpolation/mo_lib_interpolation_vector.hpp +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -22,15 +22,12 @@ // - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] // - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] template <typename T> -void edges2cells_vector_lib( - const T* p_vn_in, const T* p_vt_in, - const int* cell_edge_idx, const int* cell_edge_blk, - const T* e_bln_c_u, const T* e_bln_c_v, - T* p_u_out, T* p_v_out, - // Additional integer parameters. - int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, - int slev, int elev, - int nproma, - // Dimensions for the arrays. - int nlev, int nblks_e, int nblks_c); \ No newline at end of file +void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in, + const int *cell_edge_idx, const int *cell_edge_blk, + const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out, + T *p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); \ No newline at end of file diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 606d1e9..0ee7fa3 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -9,145 +9,146 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> +#include "mo_lib_interpolation_scalar.hpp" #include <Kokkos_Core.hpp> +#include <gtest/gtest.h> #include <vector> -#include "mo_lib_interpolation_scalar.hpp" // Free-function helpers for 3D and 4D array sizes (assumed column-major) -template<typename T> -size_t num_elements_3d(int d1, int d2, int d3) { +template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { return static_cast<size_t>(d1) * d2 * d3; } -template<typename T> -size_t num_elements_4d(int d1, int d2, int d3, int d4) { +template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { return static_cast<size_t>(d1) * d2 * d3 * d4; } // Define a helper struct that holds the two types. -template<typename InT, typename OutT> -struct MixedPrecision { - using in_type = InT; +template <typename InT, typename OutT> struct MixedPrecision { + using in_type = InT; using out_type = OutT; }; // Define the list of type pairs we want to test. -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<double, float>, - MixedPrecision<float, float> > MixedTypes; - -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<float, double>, - MixedPrecision<float, float> > MixedTypesSP2DP; +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float>> + MixedTypes; +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<float, double>, + MixedPrecision<float, float>> + MixedTypesSP2DP; // Shared dimensions for all routines and classes class interp_dimensions { public: // Constant dimensions. - static constexpr int nproma = 16; // inner loop length - static constexpr int nlev = 7; // number of vertical levels - static constexpr int nblks_c = 2; // number of cell blocks - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int nproma = 16; // inner loop length + static constexpr int nlev = 7; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = + 2; // number of vertex blocks (for rbf arrays and outputs) // Parameter values. - const int i_startblk = 0; - const int i_endblk = 1; // Test blocks [0, 1] - const int i_startidx = 2; - const int i_endidx = nproma - 3; // Partial range: 2 .. nproma-3 - const int slev = 1; - const int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) - const bool lacc = false; // Not using ACC-specific behavior. - const bool acc_async = false; // No asynchronous execution. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx = 2; + const int i_endidx = nproma - 3; // Partial range: 2 .. nproma-3 + const int slev = 1; + const int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. }; template <typename T> -class InterpolationScalarTypedTestFixture : public ::testing::Test, public interp_dimensions { +class InterpolationScalarTypedTestFixture : public ::testing::Test, + public interp_dimensions { public: - // Arrays used for verts2edges - std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) - std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) - std::vector<T> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) - std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) + std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) + std::vector<T> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) + std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) // Arrays used for edges2verts - std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) - std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) - std::vector<T> v_int; // Dimensions: (nproma, 6, nblks_v) - std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) + std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) + std::vector<T> v_int; // Dimensions: (nproma, 6, nblks_v) + std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) // Arrays used for edges2cells // std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<T> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) - std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) + std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) // Arrays used for verts2cells - std::vector<T> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) - // std::vector<InType> coeff_int; // Dimensions: (nproma, 3, nblks_c) - // std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + std::vector<T> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) // Arrays used for avg_lib - std::vector<T> psi_c; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) + std::vector<T> psi_c; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) + std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) std::vector<T> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) - std::vector<T> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) + std::vector<T> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) const int cell_type = 6; const int npromz_c = 32; InterpolationScalarTypedTestFixture() { // Allocate and initialize arrays needed for verts2edges - p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(1)); + p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(1)); edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); - coeff_int_edges.resize(num_elements_3d<T>(nproma, 2, nblks_e), static_cast<T>(1)); + coeff_int_edges.resize(num_elements_3d<T>(nproma, 2, nblks_e), + static_cast<T>(1)); - p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(0)); + p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), + static_cast<T>(0)); // Allocate & Initialize arrays needed for edges2verts - p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(1)); + p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), + static_cast<T>(1)); edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); v_int.resize(num_elements_3d<T>(nproma, 6, nblks_v), static_cast<T>(1)); - p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(0)); + p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(0)); // Allocate & Initialize arrays needed for edges2cells - // p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), static_cast<T>(1)); edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - coeff_int_cells.resize(num_elements_3d<T>(nproma, 3, nblks_c), static_cast<T>(1)); + coeff_int_cells.resize(num_elements_3d<T>(nproma, 3, nblks_c), + static_cast<T>(1)); - p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); + p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(0)); // Allocate and initialize arrays needed for verts2cells - p_vert_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), static_cast<T>(1)); + p_vert_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), + static_cast<T>(1)); cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - // coeff_int.resize(num_elements_3d<T>(nproma, cell_type, nblks_c), static_cast<T>(1)); - - // Allocate output arrays and initialize to zero. - // p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); // Allocate and initialize arrays needed for avg_lib psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - avg_coeff.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); + avg_coeff.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(1)); // Allocate output arrays and initialize to zero. - avg_psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(0)); - + avg_psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), + static_cast<T>(0)); } }; @@ -164,22 +165,11 @@ TYPED_TEST_SUITE(InterpolationScalarTypedTestFixture, SingleType); TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { verts2edges_scalar_lib<TypeParam>( - this->p_vertex_in.data(), - this->edge_vertex_idx.data(), - this->edge_vertex_blk.data(), - this->coeff_int_edges.data(), - this->p_edge_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_v, - this->nblks_e, - this->lacc); + this->p_vertex_in.data(), this->edge_vertex_idx.data(), + this->edge_vertex_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_v, this->nblks_e, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx, i_endidx] } @@ -187,10 +177,14 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 2 stencil points, expect 2. - EXPECT_NEAR(this->p_edge_out[idx], static_cast<TypeParam>(2), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 2 stencil points, + // expect 2. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<TypeParam>(2), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -205,22 +199,11 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { edges2verts_scalar_lib<TypeParam>( - this->p_edge_in.data(), - this->edge_vert_idx.data(), - this->edge_vert_blk.data(), - this->v_int.data(), - this->p_vert_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_e, - this->nblks_v, - this->lacc); + this->p_edge_in.data(), this->edge_vert_idx.data(), + this->edge_vert_blk.data(), this->v_int.data(), this->p_vert_out.data(), + this->i_startblk, this->i_endblk, this->i_startidx, this->i_endidx, + this->slev, this->elev, this->nproma, this->nlev, this->nblks_e, + this->nblks_v, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -228,10 +211,14 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<TypeParam>(6), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<TypeParam>(6), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -246,22 +233,10 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { edges2cells_scalar_lib<TypeParam>( - this->p_edge_in.data(), - this->edge_idx.data(), - this->edge_blk.data(), - this->coeff_int_cells.data(), - this->p_cell_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_e, - this->nblks_c, - this->lacc); + this->p_edge_in.data(), this->edge_idx.data(), this->edge_blk.data(), + this->coeff_int_cells.data(), this->p_cell_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx, this->i_endidx, this->slev, this->elev, + this->nproma, this->nlev, this->nblks_e, this->nblks_c, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -269,10 +244,14 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, + // expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -281,20 +260,10 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { verts2cells_scalar_lib<TypeParam>( - this->p_vert_in.data(), - this->cell_index_idx.data(), - this->cell_index_blk.data(), - this->coeff_int_cells.data(), - this->p_cell_out.data(), - this->nblks_c, - this->npromz_c, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_v, - this->lacc); - + this->p_vert_in.data(), this->cell_index_idx.data(), + this->cell_index_blk.data(), this->coeff_int_cells.data(), + this->p_cell_out.data(), this->nblks_c, this->npromz_c, this->slev, + this->elev, this->nproma, this->nlev, this->nblks_v, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -302,10 +271,14 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 3 stencil points, + // expect 3. + EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -320,23 +293,12 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { // Call the function - cell_avg_lib<TypeParam>( - this->psi_c.data(), - this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), - this->avg_coeff.data(), - this->avg_psi_c.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_c, - this->lacc); - + cell_avg_lib<TypeParam>(this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), + this->avg_coeff.data(), this->avg_psi_c.data(), + this->i_startblk, this->i_endblk, this->i_startidx, + this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -344,27 +306,32 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 4 stencil points, expect 4. - EXPECT_NEAR(this->avg_psi_c[idx], static_cast<TypeParam>(4), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 4 stencil points, + // expect 4. + EXPECT_NEAR(this->avg_psi_c[idx], static_cast<TypeParam>(4), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } } template <typename TypePair> -class InterpolationScalarMixedTestFixture : public ::testing::Test, public interp_dimensions { +class InterpolationScalarMixedTestFixture : public ::testing::Test, + public interp_dimensions { public: - using InType = typename TypePair::in_type; + using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; // Arrays used for cells2edges - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) - std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) - std::vector<OutType> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) - std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) + std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) + std::vector<OutType> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) + std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) // Further parameters for cells2edges const int patch_id = 0; @@ -376,19 +343,22 @@ public: std::vector<int> i_endidx_in; // Dimensions: (2) // Arrays used for cells2verts - std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) - std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<OutType> coeff_int_verts; // Dimensions: (nproma, 6, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<OutType> coeff_int_verts; // Dimensions: (nproma, 6, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) InterpolationScalarMixedTestFixture() { // Allocate and initialize arrays needed for cells2edges - p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), + static_cast<InType>(1)); edge_cell_idx.resize(num_elements_3d<int>(nproma, nblks_e, 2), 1); edge_cell_blk.resize(num_elements_3d<int>(nproma, nblks_e, 2), 0); - coeff_int_edges.resize(num_elements_3d<InType>(nproma, 2, nblks_e), static_cast<OutType>(1)); + coeff_int_edges.resize(num_elements_3d<InType>(nproma, 2, nblks_e), + static_cast<OutType>(1)); - p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), static_cast<OutType>(0)); + p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), + static_cast<OutType>(0)); // Allocate neighbour indexes for cells2edges i_startblk_in.resize(2, i_startblk); @@ -399,9 +369,11 @@ public: // Allocate & Initialize arrays needed for cells2verts vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int_verts.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<OutType>(1)); + coeff_int_verts.resize(num_elements_3d<InType>(nproma, 6, nblks_v), + static_cast<OutType>(1)); - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); } }; @@ -414,30 +386,18 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); //////////////////////////////////////////////////////////////////////////////// TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { - using InType = typename TestFixture::InType; + using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; // Call the function - cells2edges_scalar_lib<InType,OutType>( - this->p_cell_in.data(), - this->edge_cell_idx.data(), - this->edge_cell_blk.data(), - this->coeff_int_edges.data(), - this->p_edge_out.data(), - this->i_startblk_in.data(), - this->i_endblk_in.data(), - this->i_startidx_in.data(), - this->i_endidx_in.data(), - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_c, - this->nblks_e, - this->patch_id, - this->l_limited_area, - this->lfill_latbc, - this->lacc); + cells2edges_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->edge_cell_idx.data(), + this->edge_cell_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk_in.data(), + this->i_endblk_in.data(), this->i_startidx_in.data(), + this->i_endidx_in.data(), this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_e, this->patch_id, + this->l_limited_area, this->lfill_latbc, this->lacc); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -445,10 +405,14 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 2 stencil points, expect 2. - EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 2 stencil points, + // expect 2. + EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -461,27 +425,15 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { //////////////////////////////////////////////////////////////////////////////// TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { - using InType = typename TestFixture::InType; + using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; - cells2verts_scalar_lib<InType,OutType>( - this->p_cell_in.data(), - this->vert_cell_idx.data(), - this->vert_cell_blk.data(), - this->coeff_int_verts.data(), - this->p_vert_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_c, - this->nblks_v, - this->lacc, - this->acc_async); + cells2verts_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int_verts.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -489,10 +441,14 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { for (int level = this->slev; level < this->elev; ++level) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } @@ -504,30 +460,35 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { // //////////////////////////////////////////////////////////////////////////////// -// The test for cells2verts_ri is similar to cells2verts, but is done here separtely -// to avoid as a differebt template instantiation is needed for the function call +// The test for cells2verts_ri is similar to cells2verts, but is done here +// separtely to avoid as a differebt template instantiation is needed for the +// function call template <typename Types> -class Cells2vertsriScalarLibTestFixture : public testing::Test, public interp_dimensions{ +class Cells2vertsriScalarLibTestFixture : public testing::Test, + public interp_dimensions { public: - using InType = typename Types::in_type; - using OutType = typename Types::out_type; + using InType = typename Types::in_type; + using OutType = typename Types::out_type; // Arrays stored in std::vector. - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) - std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<InType> coeff_int; // Dimensions: (nproma, 6, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) + std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) + std::vector<InType> coeff_int; // Dimensions: (nproma, 6, nblks_v) + std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) Cells2vertsriScalarLibTestFixture() { // Allocate and initialize inputs. - p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), static_cast<InType>(1)); + p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), + static_cast<InType>(1)); vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), static_cast<InType>(1)); + coeff_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), + static_cast<InType>(1)); // Allocate output arrays and initialize to zero. - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); } }; @@ -536,28 +497,16 @@ TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { - using InType = typename TestFixture::InType; - using OutType = typename TestFixture::OutType; + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; // Call the function - cells2verts_scalar_ri_lib<InType,OutType>( - this->p_cell_in.data(), - this->vert_cell_idx.data(), - this->vert_cell_blk.data(), - this->coeff_int.data(), - this->p_vert_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx, - this->i_endidx, - this->slev, - this->elev, - this->nproma, - this->nlev, - this->nblks_c, - this->nblks_v, - this->lacc, - this->acc_async); + cells2verts_scalar_ri_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, + this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); // Check the outputs only for blocks in the range // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } @@ -566,13 +515,17 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { for (int i = this->i_startidx; i < this->i_endidx; ++i) { // Compute the linear index for a 3D array in column-major order: #ifdef __LOOP_EXCHANGE - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; #else size_t idx = level + i * this->nlev + block * this->nproma * this->nlev; #endif - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } } diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index 974efcd..680fb6e 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -9,30 +9,31 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> #include <Kokkos_Core.hpp> +#include <gtest/gtest.h> #include <vector> #include "mo_lib_interpolation_vector.hpp" // Dimensions for the test (small, trivial test). -// We assume Fortran ordering: column-major, but our C wrappers will wrap raw pointers into Kokkos::Views with LayoutLeft. +// We assume Fortran ordering: column-major, but our C wrappers will wrap raw +// pointers into Kokkos::Views with LayoutLeft. constexpr int nproma = 2; -constexpr int nlev = 3; -constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) -constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients +constexpr int nlev = 3; +constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) +constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients // For the get_indices_c_lib inputs. -constexpr int i_startblk = 0; -constexpr int i_endblk = 1; // two blocks: indices 0 and 1 +constexpr int i_startblk = 0; +constexpr int i_endblk = 1; // two blocks: indices 0 and 1 constexpr int i_startidx_in = 0; -constexpr int i_endidx_in = nproma - 1; // 0 and 1 -constexpr int slev = 0; -constexpr int elev = nlev - 1; // 0 .. 2 +constexpr int i_endidx_in = nproma - 1; // 0 and 1 +constexpr int slev = 0; +constexpr int elev = nlev - 1; // 0 .. 2 -// Helper to compute total number of elements for a 3D array stored in column-major order. -template<typename T> -size_t num_elements(int dim1, int dim2, int dim3) { +// Helper to compute total number of elements for a 3D array stored in +// column-major order. +template <typename T> size_t num_elements(int dim1, int dim2, int dim3) { return static_cast<size_t>(dim1) * dim2 * dim3; } @@ -47,12 +48,13 @@ TEST(Edges2CellsTest, DPTest) { // Here we set cell_edge_idx to 1, 2, 1 for every triple. for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i+1] = 2; - cell_edge_idx[i+2] = 1; + cell_edge_idx[i] = 1; + cell_edge_idx[i + 1] = 2; + cell_edge_idx[i + 2] = 1; } - // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 means block 0 after subtracting 1). - // e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] + // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 + // means block 0 after subtracting 1). e_bln_c_u and e_bln_c_v: dimensions + // [nproma, 6, nblks_c] std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0); std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0); // Output arrays: dimensions [nproma, nlev, nblks_c] @@ -64,15 +66,10 @@ TEST(Edges2CellsTest, DPTest) { // Call the dp (double precision) version. edges2cells_vector_lib<double>( - p_vn_in.data(), p_vt_in.data(), - cell_edge_idx.data(), cell_edge_blk.data(), - e_bln_c_u.data(), e_bln_c_v.data(), - p_u_out.data(), p_v_out.data(), - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); + p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), + cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), + p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); // Check that for each computed cell in p_u_out and p_v_out, the value is 6. // This is because for each cell, the kernel adds 6 terms of 1*1. @@ -91,9 +88,9 @@ TEST(Edges2CellsTest, SPTest) { std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); // Set cell_edge_idx values to 1, 2, 1. for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i+1] = 2; - cell_edge_idx[i+2] = 1; + cell_edge_idx[i] = 1; + cell_edge_idx[i + 1] = 2; + cell_edge_idx[i + 2] = 1; } std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f); std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f); @@ -105,15 +102,10 @@ TEST(Edges2CellsTest, SPTest) { // Call the sp (float precision) version. edges2cells_vector_lib<float>( - p_vn_in.data(), p_vt_in.data(), - cell_edge_idx.data(), cell_edge_blk.data(), - e_bln_c_u.data(), e_bln_c_v.data(), - p_u_out.data(), p_v_out.data(), - i_startblk, i_endblk, - i_startidx_in, i_endidx_in, - slev, elev, - nproma, - nlev, nblks_e, nblks_c); + p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), + cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), + p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, + elev, nproma, nlev, nblks_e, nblks_c); // Verify that every computed output equals 6. for (size_t idx = 0; idx < p_u_out.size(); ++idx) { -- GitLab