diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 73e582ccb8a333a6aedf0ce2cca7cf45862f4067..9455f9ec92a36507515fe334673f6e405b1ae9de 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -13,6 +13,7 @@ add_library( iconmath-interpolation mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 + mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) @@ -55,10 +56,20 @@ target_include_directories( $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>> # Path to internal include directory $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${PROJECT_SOURCE_DIR}/include>> + # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for + # multiple compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + PRIVATE + # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple + # compile languages + # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>> ) target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support) target_link_libraries(iconmath-interpolation PUBLIC iconmath-support) +target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos) install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets") diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp new file mode 100644 index 0000000000000000000000000000000000000000..50772e7af96ea43e9405eb64b90292a95ef3b2c9 --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.cpp @@ -0,0 +1,130 @@ +#include "mo_lib_loopindices.hpp" +#include "mo_lib_interpolation_vector.hpp" + +// The templated C++ function using Kokkos. +// Raw pointer arguments are wrapped into unmanaged Kokkos::Views. +// Note: The dimensions below must match the Fortran arrays. +// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e] +// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] +// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c] +// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c] +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c) +{ + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e); + + UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c); + UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c); + + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); + + // Loop over cell blocks as in the original Fortran code. + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Call get_indices_c_lib to get inner loop indices for block jb. + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, + jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for("edges2cells_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + // Compute the bilinear interpolation for cell (jc, jk, jb). + p_u_out_view(jc, jk, jb) = + e_bln_c_u_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_u_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_u_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_u_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + + p_v_out_view(jc, jk, jb) = + e_bln_c_v_view(jc, 0, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 1, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) + + e_bln_c_v_view(jc, 2, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 3, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) + + e_bln_c_v_view(jc, 4, jb) * + p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) + + e_bln_c_v_view(jc, 5, jb) * + p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1); + }); + // Optionally fence after each block if required. + Kokkos::fence(); + } +} + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<double>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c) +{ + edges2cells_vector_lib<float>(p_vn_in, p_vt_in, + cell_edge_idx, cell_edge_blk, + e_bln_c_u, e_bln_c_v, + p_u_out, p_v_out, + i_startblk, i_endblk, + i_startidx_in, i_endidx_in, + slev, elev, + nproma, + nlev, nblks_e, nblks_c); +} diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a764ada5dddfbd65e43038a8bfe678ead7ebbb6c --- /dev/null +++ b/src/interpolation/mo_lib_interpolation_vector.hpp @@ -0,0 +1,38 @@ +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T> +void edges2cells_vector_lib( + const T* p_vn_in, const T* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const T* e_bln_c_u, const T* e_bln_c_v, + T* p_u_out, T* p_v_out, + // Additional integer parameters. + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + // Dimensions for the arrays. + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_dp( + const double* p_vn_in, const double* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const double* e_bln_c_u, const double* e_bln_c_v, + double* p_u_out, double* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); + +extern "C" void edges2cells_vector_lib_sp( + const float* p_vn_in, const float* p_vt_in, + const int* cell_edge_idx, const int* cell_edge_blk, + const float* e_bln_c_u, const float* e_bln_c_v, + float* p_u_out, float* p_v_out, + int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, + int slev, int elev, + int nproma, + int nlev, int nblks_e, int nblks_c); diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d53aa38b490e4ee1577ac6c0cf8376885c624841 --- /dev/null +++ b/src/support/mo_lib_loopindices.hpp @@ -0,0 +1,11 @@ +extern "C" { + // get_indices_c_lib function + void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); + + void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk, + int &i_startidx_out, int &i_endidx_out); +}