Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • icon-libraries/libiconmath
1 result
Show changes
Commits on Source (40)
Showing
with 2695 additions and 93 deletions
......@@ -37,3 +37,4 @@ with section("lint"):
local_var_pattern = '[a-zA-Z][0-9a-zA-z_]+'
private_var_pattern = '[a-z][a-z0-9_]+'
public_var_pattern = '[A-Z][0-9a-zA-Z_]+'
global_var_pattern = '[A-Z][0-9a-zA-Z_]+'
......@@ -8,6 +8,12 @@ cmake_install.cmake
iconmath-config-version.cmake
iconmath-config.cmake
iconmath-targets.cmake
KokkosConfig.cmake
KokkosConfigVersion.cmake
KokkosTargets.cmake
KokkosConfigCommon.cmake
Kokkos_Version_Info.cpp
Kokkos_Version_Info.hpp
# Build stage files:
*.L
......@@ -22,4 +28,5 @@ iconmath_Tests
# Test stage files:
/**/Testing/*
iconmath_Tests
run_tests.f90
......@@ -14,7 +14,9 @@ cmake_minimum_required(VERSION 3.18)
project(
iconmath
VERSION 1.0.0
LANGUAGES Fortran)
LANGUAGES Fortran CXX)
set(CMAKE_CXX_STANDARD 17)
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(BUILD_TESTING "Build tests" ON)
......@@ -23,9 +25,11 @@ option(BUILD_ICONMATH_HORIZONTAL "Build horizontal library" ON)
option(IM_ENABLE_MIXED_PRECISION "Enable mixed precision" OFF)
option(IM_ENABLE_LOOP_EXCHANGE "Enable loop exchange" OFF)
option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF)
option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF)
option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF)
option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF)
set(IM_ENABLE_GPU OFF CACHE STRING "Enable Kokkos GPU support for arch. Valid values: OFF, nvidia-sm80")
# GNUInstallDirs issues a warning if CMAKE_SIZEOF_VOID_P is not defined, which
# is the case with NAG. One way to circumvent that is to enable C language for
......@@ -107,6 +111,32 @@ else()
endif()
endif()
include(FetchContent)
# configure kokkos 4.4 repository link
FetchContent_Declare(
kokkos
URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz
URL_HASH MD5=eafd0d42c9831858aa84fde78576644c)
# disable build of C++23 mdspan experimental support for now
set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support")
# by default, build the Kokkos serial backend for CPU
set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend")
set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations")
if ("${IM_ENABLE_GPU}" STREQUAL "nvidia-sm80")
# NVIDIA A100
set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Kokkos CUDA backend")
set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "CUDA architecture: Ampere cc80")
endif()
if (${IM_ENABLE_OPENMP})
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Kokkos OpenMP backend")
endif()
FetchContent_MakeAvailable(kokkos)
add_subdirectory(src)
# Allow for 'make test' even if the tests are disabled:
......
......@@ -2,6 +2,9 @@
extend-ignore-re = [
".*_pn",
]
extend-ignore-words-re = [
"Comput",
]
[default.extend-words]
Wirth = "Wirth" # author name
......
......@@ -39,7 +39,9 @@ if(IM_ENABLE_OPENACC)
# If _OPENACC is defined, assume that the required compiler flags are already
# provided, e.g. in CMAKE_Fortran_FLAGS:
if(NOT HAS_OPENACC_MACRO)
target_compile_options(iconmath-horizontal PRIVATE ${OpenACC_Fortran_OPTIONS})
target_compile_options(iconmath-horizontal
PRIVATE
$<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
# This make sures that unit tests (FortUTF) compiles without the need of
# passing OpenACC compile option.
target_link_libraries(iconmath-horizontal PRIVATE OpenACC::OpenACC_Fortran)
......@@ -52,11 +54,22 @@ target_include_directories(
# Path to the Fortran modules:
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>>
$<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
# Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
# multiple compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
PRIVATE
# Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
# compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>
)
target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support)
target_link_libraries(iconmath-horizontal PUBLIC iconmath-support)
target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation)
target_link_libraries(iconmath-horizontal PRIVATE Kokkos::kokkos)
set_target_properties(iconmath-horizontal PROPERTIES LINKER_LANGUAGE Fortran)
install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets")
......
......@@ -12,8 +12,12 @@
add_library(
iconmath-interpolation
mo_lib_interpolation_scalar.F90
mo_lib_interpolation_scalar.cpp
mo_lib_interpolation_vector.F90
mo_lib_intp_rbf.F90)
mo_lib_interpolation_vector.cpp
mo_lib_intp_rbf.F90
mo_lib_intp_rbf.cpp
interpolation_bindings.cpp)
add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
......@@ -40,7 +44,8 @@ if(IM_ENABLE_OPENACC)
# provided, e.g. in CMAKE_Fortran_FLAGS:
if(NOT HAS_OPENACC_MACRO)
target_compile_options(iconmath-interpolation
PRIVATE ${OpenACC_Fortran_OPTIONS})
PRIVATE
$<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
# This make sures that unit tests (FortUTF) compiles without the need of
# passing OpenACC compile option.
target_link_libraries(iconmath-interpolation PRIVATE OpenACC::OpenACC_Fortran)
......@@ -55,10 +60,21 @@ target_include_directories(
$<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
# Path to internal include directory
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${PROJECT_SOURCE_DIR}/include>>
# Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
# multiple compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
PRIVATE
# Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
# compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>
)
target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support)
target_link_libraries(iconmath-interpolation PUBLIC iconmath-support)
target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos)
set_target_properties(iconmath-interpolation PROPERTIES LINKER_LANGUAGE Fortran)
install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets")
......
This diff is collapsed.
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
extern "C" {
// mo_lib_interpolation_vector.F90
void edges2cells_vector_lib_dp(const double *p_vn_in, const double *p_vt_in,
const int *cell_edge_idx,
const int *cell_edge_blk,
const double *e_bln_c_u, const double *e_bln_c_v,
double *p_u_out, double *p_v_out, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in,
int slev, int elev, int nproma, int nlev,
int nblks_e, int nblks_c);
void edges2cells_vector_lib_sp(const float *p_vn_in, const float *p_vt_in,
const int *cell_edge_idx,
const int *cell_edge_blk, const float *e_bln_c_u,
const float *e_bln_c_v, float *p_u_out,
float *p_v_out, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev,
int elev, int nproma, int nlev, int nblks_e,
int nblks_c);
// mo_lib_interpolation_scalar.F90
void verts2edges_scalar_lib_dp(
const double *p_vertex_in, const int *edge_vertex_idx,
const int *edge_vertex_blk, const double *coeff_int, double *p_edge_out,
const int i_startblk, const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev, const int nproma,
const int nlev, const int nblks_v, const int nblks_e, const bool lacc);
void verts2edges_scalar_lib_sp(
const float *p_vertex_in, const int *edge_vertex_idx,
const int *edge_vertex_blk, const float *coeff_int, float *p_edge_out,
const int i_startblk, const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev, const int nproma,
const int nlev, const int nblks_v, const int nblks_e, const bool lacc);
void cells2edges_scalar_lib_dp(
const double *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk,
const double *coeff_int, double *p_edge_out, const int *i_startblk_in,
const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblk_c, const int nblks_e, const int patch_id,
const bool l_limited_area, const bool lfill_latbc, const bool lacc);
void cells2edges_scalar_lib_sp(const float *p_cell_in, const int *edge_cell_idx,
const int *edge_cell_blk, const float *coeff_int,
float *p_edge_out, const int *i_startblk_in,
const int *i_endblk_in, const int *i_startidx_in,
const int *i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblk_c, const int nblks_e,
const int patch_id, const bool l_limited_area,
const bool lfill_latbc, const bool lacc);
void cells2edges_scalar_lib_sp2dp(
const float *p_cell_in, const int *edge_cell_idx, const int *edge_cell_blk,
const double *coeff_int, double *p_edge_out, const int *i_startblk_in,
const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblk_c, const int nblks_e, const int patch_id,
const bool l_limited_area, const bool lfill_latbc, const bool lacc);
void edges2verts_scalar_lib_dp(
const double *p_edge_in, const int *vert_edge_idx, const int *vert_edge_blk,
const double *v_int, double *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_v, const bool lacc);
void edges2verts_scalar_lib_sp(const float *p_edge_in, const int *vert_edge_idx,
const int *vert_edge_blk, const float *v_int,
float *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_v,
const bool lacc);
void edges2cells_scalar_lib_dp(const double *p_edge_in, const int *edge_idx,
const int *edge_blk, const double *coeff_int,
double *p_cell_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_c,
const bool lacc);
void edges2cells_scalar_lib_sp(const float *p_edge_in, const int *edge_idx,
const int *edge_blk, const float *coeff_int,
float *p_cell_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_c,
const bool lacc);
/////////////////////////////////////////////
void cells2verts_scalar_lib_dp(
const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk,
const double *coeff_int, double *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v, const bool lacc,
const bool acc_async);
void cells2verts_scalar_lib_dp2sp(
const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk,
const float *coeff_int, float *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v, const bool lacc,
const bool acc_async);
void cells2verts_scalar_lib_sp(const float *p_cell_in, const int *vert_cell_idx,
const int *vert_cell_blk, const float *coeff_int,
float *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v,
const bool lacc, const bool acc_async);
/////////////////////////////////////////////
void cells2verts_scalar_ri_lib_dp(
const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk,
const double *coeff_int, double *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v, const bool lacc,
const bool acc_async);
void cells2verts_scalar_ri_lib_dp2sp(
const double *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk,
const double *coeff_int, float *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v, const bool lacc,
const bool acc_async);
void cells2verts_scalar_ri_lib_sp(
const float *p_cell_in, const int *vert_cell_idx, const int *vert_cell_blk,
const float *coeff_int, float *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v, const bool lacc,
const bool acc_async);
/////////////////////////////////////////////
void verts2cells_scalar_lib_dp(
const double *p_vert_in, const int *cell_index_idx,
const int *cell_vertex_blk, const double *coeff_int, double *p_cell_out,
const int nblks_c, const int npromz_c, const int slev, const int elev,
const int nproma, const int nlev, const int nblks_v, const bool lacc);
void verts2cells_scalar_lib_sp(
const float *p_vert_in, const int *cell_index_idx,
const int *cell_vertex_blk, const float *coeff_int, float *p_cell_out,
const int nblks_c, const int npromz_c, const int slev, const int elev,
const int nproma, const int nlev, const int nblks_v, const bool lacc);
/////////////////////////////////////////////
void cell_avg_lib_dp(const double *psi_c, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const double *avg_coeff,
double *avg_psi_c, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev,
const int nproma, const int nlev, const int nblks_c,
const bool lacc);
void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const float *avg_coeff,
float *avg_psi_c, const int i_startblk, const int i_endblk,
const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma,
const int nlev, const int nblks_c, const bool lacc);
void rbf_vec_interpol_vertex_lib_dp(
const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
const int i_startblk, const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev, const int nproma,
const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
const int nblks_v);
void rbf_vec_interpol_vertex_lib_sp(
const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
const int i_startblk, const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev, const int nproma,
const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
const int nblks_v);
void rbf_vec_interpol_vertex_lib_dpsp(
const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
const int i_startblk, const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev, const int elev, const int nproma,
const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
const int nblks_v);
void rbf_interpol_c2grad_lib_sp(
const float *p_cell_in, const int *rbf_c2grad_idx,
const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
int nlev, int nblk_c, bool lacc);
void rbf_interpol_c2grad_lib_dp(
const double *p_cell_in, const int *rbf_c2grad_idx,
const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x,
const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
int nlev, int nblk_c, bool lacc);
void rbf_vec_interpol_cell_lib_sp(
const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
bool lacc, bool acc_async);
void rbf_vec_interpol_cell_lib_dp(
const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
bool lacc, bool acc_async);
void rbf_vec_interpol_edge_lib_dp(
const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
bool acc_async);
void rbf_vec_interpol_edge_lib_sp(
const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async);
}
This diff is collapsed.
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
template <typename T>
void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx,
const int *edge_vertex_blk, const T *coeff_int,
T *p_edge_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_v, const int nblks_e,
const bool lacc);
;
template <typename T, typename S>
void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx,
const int *edge_cell_blk, const S *coeff_int,
S *p_edge_out, const int *i_startblk_in,
const int *i_endblk_in, const int *i_startidx_in,
const int *i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblk_c, const int nblks_e,
const int patch_id, const bool l_limited_area,
const bool lfill_latbc, const bool lacc);
template <typename T>
void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx,
const int *vert_edge_blk, const T *v_int,
T *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_v,
const bool lacc);
template <typename T>
void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx,
const int *edge_blk, const T *coeff_int,
T *p_cell_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_e, const int nblks_c,
const bool lacc);
template <typename T, typename S>
void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx,
const int *vert_cell_blk, const S *coeff_int,
S *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v,
const bool lacc, const bool acc_async);
template <typename T, typename S>
void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx,
const int *vert_cell_blk, const T *coeff_int,
S *p_vert_out, const int i_startblk,
const int i_endblk, const int i_startidx_in,
const int i_endidx_in, const int slev,
const int elev, const int nproma, const int nlev,
const int nblks_c, const int nblks_v,
const bool lacc, const bool acc_async);
template <typename T>
void verts2cells_scalar_lib(const T *p_vert_in, const int *cell_index_idx,
const int *cell_vertex_blk, const T *coeff_int,
T *p_cell_out, const int nblks_c,
const int npromz_c, const int slev, const int elev,
const int nproma, const int nlev, const int nblks_v,
const bool lacc);
template <typename T>
void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const T *avg_coeff,
T *avg_psi_c, const int i_startblk, const int i_endblk,
const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma,
const int nlev, const int nblks_c, const bool lacc);
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#include "mo_lib_interpolation_vector.hpp"
template <typename T>
void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in,
const int *cell_edge_idx, const int *cell_edge_blk,
const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out,
T *p_v_out,
// Additional integer parameters.
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma,
// Dimensions for the arrays.
int nlev, int nblks_e, int nblks_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e);
UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3);
UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c);
UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c);
UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c);
// Loop over cell blocks as in the original Fortran code.
for (int jb = i_startblk; jb <= i_endblk; ++jb) {
// Call get_indices_c_lib to get inner loop indices for block jb.
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
{slev, i_startidx}, {elev + 1, i_endidx + 1});
Kokkos::parallel_for(
"edges2cells_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
// Compute the bilinear interpolation for cell (jc, jk, jb).
p_u_out_view(jc, jk, jb) =
e_bln_c_u_view(jc, 0, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk,
cell_edge_blk_view(jc, jb, 0) - 1) +
e_bln_c_u_view(jc, 1, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk,
cell_edge_blk_view(jc, jb, 0) - 1) +
e_bln_c_u_view(jc, 2, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk,
cell_edge_blk_view(jc, jb, 1) - 1) +
e_bln_c_u_view(jc, 3, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk,
cell_edge_blk_view(jc, jb, 1) - 1) +
e_bln_c_u_view(jc, 4, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk,
cell_edge_blk_view(jc, jb, 2) - 1) +
e_bln_c_u_view(jc, 5, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk,
cell_edge_blk_view(jc, jb, 2) - 1);
p_v_out_view(jc, jk, jb) =
e_bln_c_v_view(jc, 0, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk,
cell_edge_blk_view(jc, jb, 0) - 1) +
e_bln_c_v_view(jc, 1, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk,
cell_edge_blk_view(jc, jb, 0) - 1) +
e_bln_c_v_view(jc, 2, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk,
cell_edge_blk_view(jc, jb, 1) - 1) +
e_bln_c_v_view(jc, 3, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk,
cell_edge_blk_view(jc, jb, 1) - 1) +
e_bln_c_v_view(jc, 4, jb) *
p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk,
cell_edge_blk_view(jc, jb, 2) - 1) +
e_bln_c_v_view(jc, 5, jb) *
p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk,
cell_edge_blk_view(jc, jb, 2) - 1);
});
// Optionally fence after each block if required.
Kokkos::fence();
}
}
template void edges2cells_vector_lib<double>(
const double *p_vn_in, const double *p_vt_in, const int *cell_edge_idx,
const int *cell_edge_blk, const double *e_bln_c_u, const double *e_bln_c_v,
double *p_u_out, double *p_v_out,
// Additional integer parameters.
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
int elev, int nproma,
// Dimensions for the arrays.
int nlev, int nblks_e, int nblks_c);
template void edges2cells_vector_lib<float>(
const float *p_vn_in, const float *p_vt_in, const int *cell_edge_idx,
const int *cell_edge_blk, const float *e_bln_c_u, const float *e_bln_c_v,
float *p_u_out, float *p_v_out,
// Additional integer parameters.
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
int elev, int nproma,
// Dimensions for the arrays.
int nlev, int nblks_e, int nblks_c);
\ No newline at end of file
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
#include "mo_lib_loopindices.hpp"
#include <Kokkos_Core.hpp>
#include <vector>
// The templated C++ function using Kokkos.
// Raw pointer arguments are wrapped into unmanaged Kokkos::Views.
// Note: The dimensions below must match the Fortran arrays.
// - p_vn_in and p_vt_in: dimensions [nproma, nlev, nblks_e]
// - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3]
// - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c]
// - p_u_out and p_v_out: dimensions [nproma, nlev, nblks_c]
template <typename T>
void edges2cells_vector_lib(const T *p_vn_in, const T *p_vt_in,
const int *cell_edge_idx, const int *cell_edge_blk,
const T *e_bln_c_u, const T *e_bln_c_v, T *p_u_out,
T *p_v_out,
// Additional integer parameters.
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma,
// Dimensions for the arrays.
int nlev, int nblks_e, int nblks_c);
\ No newline at end of file
This diff is collapsed.
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
#include "mo_lib_loopindices.hpp"
#include <Kokkos_Core.hpp>
#include <vector>
template <typename T, typename S>
void rbf_vec_interpol_vertex_lib(
const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk,
const int i_endblk, const int i_startidx_in, const int i_endidx_in,
const int slev, const int elev, const int nproma, const bool lacc,
const bool acc_async, const int nlev, const int nblks_e, const int nblks_c);
template <typename T>
void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx,
const int *rbf_c2grad_blk,
const T *rbf_c2grad_coeff, T *grad_x, T *grad_y,
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma,
int rbf_c2grad_dim, int nlev, int nblks_c,
bool lacc);
template <typename T>
void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c,
const int *rbf_vec_blk_c,
const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out,
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma,
int nlev, int nblks_c, int nblks_e,
int rbf_vec_dim_c, bool lacc, bool acc_async);
template <typename T>
void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e,
const int *rbf_vec_blk_e,
const T *rbf_vec_coeff_e, T *p_vt_out,
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nlev,
int nproma, int rbf_vec_dim_e, int nblks_e,
bool lacc, bool acc_async);
......@@ -13,11 +13,14 @@ add_library(
iconmath-support
mo_gridman_constants.f90
mo_lib_grid_geometry_info.f90
mo_lib_loopindices.f90
mo_lib_loopindices.cpp
mo_lib_loopindices.F90
mo_math_constants.f90
mo_math_types.f90
mo_math_utilities.cpp
mo_math_utilities.F90
mo_random_number_generators.F90)
mo_random_number_generators.F90
support_bindings.cpp)
add_library(${PROJECT_NAME}::support ALIAS iconmath-support)
......@@ -40,11 +43,17 @@ if(IM_ENABLE_DIM_SWAP)
target_compile_definitions(iconmath-support PRIVATE __SWAPDIM)
endif()
if(IM_USE_CPP_BINDINGS)
target_compile_definitions(iconmath-support PUBLIC __USE_CPP_BINDINGS)
endif()
if(IM_ENABLE_OPENACC)
# If _OPENACC is defined, assume that the required compiler flags are already
# provided, e.g. in CMAKE_Fortran_FLAGS:
if(NOT HAS_OPENACC_MACRO)
target_compile_options(iconmath-support PRIVATE ${OpenACC_Fortran_OPTIONS})
target_compile_options(iconmath-support
PRIVATE
$<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
# This make sures that unit tests (FortUTF) compiles without the need of
# passing OpenACC compile option.
target_link_libraries(iconmath-support PRIVATE OpenACC::OpenACC_Fortran)
......@@ -57,9 +66,24 @@ target_include_directories(
# Path to the Fortran modules:
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>>
$<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
)
INTERFACE
# Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
# multiple compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
PRIVATE
# Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
# compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>)
target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support)
target_link_libraries(iconmath-support
PUBLIC
fortran-support::fortran-support
PRIVATE
Kokkos::kokkos
)
set_target_properties(iconmath-support PROPERTIES LINKER_LANGUAGE Fortran)
install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets")
......
......@@ -16,12 +16,18 @@
MODULE mo_lib_loopindices
#ifdef __USE_CPP_BINDINGS
USE, INTRINSIC :: ISO_C_BINDING
#endif
IMPLICIT NONE
PRIVATE
PUBLIC :: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib
#ifndef __USE_CPP_BINDINGS
CONTAINS
!-------------------------------------------------------------------------
......@@ -121,5 +127,31 @@ CONTAINS
END SUBROUTINE get_indices_v_lib
END MODULE mo_lib_loopindices
#else
INTERFACE
SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib")
IMPORT :: c_int
INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(c_int) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_c_lib
SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib")
IMPORT :: c_int
INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(c_int) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_e_lib
SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib")
IMPORT :: c_int
INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(c_int) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_v_lib
END INTERFACE
#endif
END MODULE mo_lib_loopindices
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#include <algorithm> // For std::max
// get_indices_c_lib function
void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
//Since code is ported incrementally from Fortran to C++, depending on where the function is called from
//(either fortran or c++), the first index should be either 0 or 1.
int first_index;
if (called_from_cpp)
first_index = 0;
else
first_index = 1;
if (i_blk == i_startblk) {
i_startidx_out = std::max(first_index, i_startidx_in);
i_endidx_out = nproma;
if (i_blk == i_endblk) {
i_endidx_out = i_endidx_in;
}
} else if (i_blk == i_endblk) {
i_startidx_out = first_index;
i_endidx_out = i_endidx_in;
} else {
i_startidx_out = first_index;
i_endidx_out = nproma;
}
}
// get_indices_e_lib function
void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
//Since code is ported incrementally from Fortran to C++, depending on where the function is called from,
//the first index should be either 0 or 1.
int first_index;
if (called_from_cpp)
first_index = 0;
else
first_index = 1;
i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in);
i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in;
}
// get_indices_v_lib function
void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
//Since code is ported incrementally from Fortran to C++, depending on where the function is called from,
//the first index should be either 0 or 1.
int first_index;
if (called_from_cpp)
first_index = 0;
else
first_index = 1;
if (i_blk == i_startblk) {
i_startidx_out = i_startidx_in;
i_endidx_out = nproma;
if (i_blk == i_endblk) {
i_endidx_out = i_endidx_in;
}
} else if (i_blk == i_endblk) {
i_startidx_out = first_index;
i_endidx_out = i_endidx_in;
} else {
i_startidx_out = first_index;
i_endidx_out = nproma;
}
}
\ No newline at end of file
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma,
const int i_blk, const int i_startblk, const int i_endblk,
int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
\ No newline at end of file
......@@ -22,6 +22,7 @@
! #endif
MODULE mo_math_utilities
USE, INTRINSIC :: ISO_C_BINDING
USE mo_iconlib_kind, ONLY: wp, dp, sp
USE mo_math_constants, ONLY: pi, pi_2, dbl_eps
USE mo_gridman_constants, ONLY: SUCCESS, TORUS_MAX_LAT
......@@ -78,7 +79,11 @@ MODULE mo_math_utilities
PUBLIC :: line_intersect
PUBLIC :: lintersect
PUBLIC :: tdma_solver
#ifndef __USE_CPP_BINDINGS
PUBLIC :: tdma_solver_vec
#else
PUBLIC :: tdma_solver_vec_dp
#endif
PUBLIC :: check_orientation
! vertical coordinates routines
......@@ -126,6 +131,11 @@ MODULE mo_math_utilities
MODULE PROCEDURE check_orientation_sp
END INTERFACE
INTERFACE rotate_latlon
MODULE PROCEDURE rotate_latlon_dp
MODULE PROCEDURE rotate_latlon_sp
END INTERFACE
!> Derived type specifying a set of REAL(dp) values.
!
TYPE t_value_set
......@@ -160,7 +170,99 @@ MODULE mo_math_utilities
CHARACTER(LEN=*), PARAMETER :: modname = 'mo_math_utilities'
!-------------------------------------------------------------------------
!>
!! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
!!
!! a - sub-diagonal (means it is the diagonal below the main diagonal)
!! b - the main diagonal
!! c - sup-diagonal (means it is the diagonal above the main diagonal)
!! d - right part
!! varout - the answer (identical to x in description above)
!! slev - start level (top)
!! elev - end level (bottom)
! Preprocessor directive to conditionally include the tdma_solver_vec implementation
#ifndef __USE_CPP_BINDINGS
CONTAINS
SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
INTEGER, INTENT(IN) :: slev, elev
INTEGER, INTENT(IN) :: startidx, endidx
REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
REAL(wp), INTENT(OUT) :: varout(:, :)
INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
!
! local
REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
INTEGER :: i
INTEGER :: jc
INTEGER :: acc_queue
IF (PRESENT(opt_acc_queue)) THEN
acc_queue = opt_acc_queue
ELSE
acc_queue = 1
END IF
! initialize c-prime and d-prime
!$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
c_p(jc, slev) = c(jc, slev)/b(jc, slev)
d_p(jc, slev) = d(jc, slev)/b(jc, slev)
END DO
! solve for vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = slev + 1, elev
!$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
DO jc = startidx, endidx
m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
c_p(jc, i) = c(jc, i)*m
d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
END DO
END DO
! initialize varout
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, elev) = d_p(jc, elev)
END DO
! solve for varout from the vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = elev - 1, slev, -1
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
END DO
END DO
!$ACC END PARALLEL
IF (.NOT. PRESENT(opt_acc_queue)) THEN
!$ACC WAIT(acc_queue)
END IF
END SUBROUTINE tdma_solver_vec
#else
! C++ binding for tdma_solver_vec
INTERFACE
SUBROUTINE tdma_solver_vec_dp(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
BIND(C, NAME="tdma_solver_vec_dp")
IMPORT :: c_double, c_int
REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*)
INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols
REAL(c_double), INTENT(OUT) :: varout(*)
INTEGER(c_int), OPTIONAL :: opt_acc_queue
END SUBROUTINE tdma_solver_vec_dp
END INTERFACE
CONTAINS
#endif
!-------------------------------------------------------------------------
! Variant for double-precision (or working-precision=dp) lon+lat in ICON
......@@ -1674,12 +1776,12 @@ CONTAINS
!! See the routine "mo_lonlat_grid::rotate_latlon_grid" for a
!! detailed description of the transformation process.
!!
SUBROUTINE rotate_latlon(lat, lon, pollat, pollon)
SUBROUTINE rotate_latlon_dp(lat, lon, pollat, pollon)
REAL(wp), INTENT(inout) :: lat, lon
REAL(wp), INTENT(in) :: pollat, pollon
REAL(dp), INTENT(inout) :: lat, lon
REAL(dp), INTENT(in) :: pollat, pollon
REAL(wp) :: rotlat, rotlon
REAL(dp) :: rotlat, rotlon
!-----------------------------------------------------------------------
rotlat = ASIN(SIN(lat)*SIN(pollat) + COS(lat)*COS(pollat)*COS(lon - pollon))
......@@ -1689,7 +1791,24 @@ CONTAINS
lat = rotlat
lon = rotlon
END SUBROUTINE rotate_latlon
END SUBROUTINE rotate_latlon_dp
SUBROUTINE rotate_latlon_sp(lat, lon, pollat, pollon)
REAL(sp), INTENT(inout) :: lat, lon
REAL(sp), INTENT(in) :: pollat, pollon
REAL(sp) :: rotlat, rotlon
!-----------------------------------------------------------------------
rotlat = ASIN(SIN(lat)*SIN(pollat) + COS(lat)*COS(pollat)*COS(lon - pollon))
rotlon = ATAN2(COS(lat)*SIN(lon - pollon), &
& (COS(lat)*SIN(pollat)*COS(lon - pollon) - SIN(lat)*COS(pollat)))
lat = rotlat
lon = rotlon
END SUBROUTINE rotate_latlon_sp
!-------------------------------------------------------------------------
!-----------------------------------------------------------------------
......@@ -2019,96 +2138,23 @@ CONTAINS
END SUBROUTINE tdma_solver
!-------------------------------------------------------------------------
!>
!! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
!!
!! a - sub-diagonal (means it is the diagonal below the main diagonal)
!! b - the main diagonal
!! c - sup-diagonal (means it is the diagonal above the main diagonal)
!! d - right part
!! varout - the answer (identical to x in description above)
!! slev - start level (top)
!! elev - end level (bottom)
SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
INTEGER, INTENT(IN) :: slev, elev
INTEGER, INTENT(IN) :: startidx, endidx
REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
REAL(wp), INTENT(OUT) :: varout(:, :)
INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
!
! local
REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
INTEGER :: i
INTEGER :: jc
INTEGER :: acc_queue
IF (PRESENT(opt_acc_queue)) THEN
acc_queue = opt_acc_queue
ELSE
acc_queue = 1
END IF
! initialize c-prime and d-prime
!$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
c_p(jc, slev) = c(jc, slev)/b(jc, slev)
d_p(jc, slev) = d(jc, slev)/b(jc, slev)
END DO
! solve for vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = slev + 1, elev
!$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
DO jc = startidx, endidx
m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
c_p(jc, i) = c(jc, i)*m
d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
END DO
END DO
! initialize varout
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, elev) = d_p(jc, elev)
END DO
! solve for varout from the vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = elev - 1, slev, -1
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
END DO
END DO
!$ACC END PARALLEL
IF (.NOT. PRESENT(opt_acc_queue)) THEN
!$ACC WAIT(acc_queue)
END IF
END SUBROUTINE tdma_solver_vec
!-------------------------------------------------------------------------
!-------------------------------------------------------------------------
!
!> Helper functions for computing the vertical layer structure
!> zlev double-precision in ICON
SUBROUTINE set_zlev(zlev_i, zlev_m, n_zlev, dzlev_m)
INTEGER, INTENT(IN) :: n_zlev
REAL(dp), INTENT(OUT) :: zlev_i(n_zlev + 1), zlev_m(n_zlev)
REAL(dp), INTENT(IN) :: dzlev_m(:) ! namelist input of layer thickness
REAL(wp), INTENT(OUT) :: zlev_i(n_zlev + 1), zlev_m(n_zlev)
REAL(wp), INTENT(IN) :: dzlev_m(:) ! namelist input of layer thickness
INTEGER :: jk
REAL(dp) :: accum
REAL(wp) :: accum
accum = 0.0_dp
accum = 0.0_wp
! zlev_i : upper border surface of vertical cells
! zlev_m : position of coordinate surfaces in meters below zero surface.
DO jk = 1, n_zlev
zlev_i(jk) = accum
zlev_m(jk) = 0.5_dp*(2.0_dp*accum + dzlev_m(jk))
zlev_m(jk) = 0.5_wp*(2.0_wp*accum + dzlev_m(jk))
accum = accum + dzlev_m(jk)
END DO
zlev_i(n_zlev + 1) = accum
......
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#include "mo_math_utilities.hpp"
template <typename T>
void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d,
int slev, int elev, int startidx, int endidx,
int nrows, int ncols, T* varout) {
// Start timing
auto start_time = std::chrono::high_resolution_clock::now();
// Allocate temporary arrays using Kokkos::View.
// The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols].
// Kokkos::View automatically handles memory management.
Kokkos::View<T**> c_p("c_p", nrows, ncols);
Kokkos::View<T**> d_p("d_p", nrows, ncols);
// Wrap the input arrays in unmanaged views.
// We assume that the input arrays are laid out in column-major order as in the original code.
// Here we use LayoutLeft so that the first index (row) is contiguous.
typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D;
typedef Kokkos::View<T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D;
UnmanagedConst2D a_view(a, nrows, ncols);
UnmanagedConst2D b_view(b, nrows, ncols);
UnmanagedConst2D c_view(c, nrows, ncols);
UnmanagedConst2D d_view(d, nrows, ncols);
Unmanaged2D varout_view(varout, nrows, ncols);
// Initialize c-prime and d-prime at the starting level (slev)
Kokkos::parallel_for("init_c_p_d_p", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
c_p(jc, slev) = c_view(jc, slev) / b_view(jc, slev);
d_p(jc, slev) = d_view(jc, slev) / b_view(jc, slev);
});
Kokkos::fence();
// Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1.
for (int i = slev + 1; i < elev; ++i) {
Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
T m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i));
c_p(jc, i) = c_view(jc, i) * m;
d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m;
});
Kokkos::fence();
}
// Initialize the output array at the last level (elev-1)
Kokkos::parallel_for("init_varout", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
varout_view(jc, elev-1) = d_p(jc, elev-1);
});
Kokkos::fence();
// Back substitution: update varout for columns from elev-2 down to slev.
for (int i = elev - 2; i >= slev; --i) {
Kokkos::parallel_for("back_substitution", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
varout_view(jc, i) = d_p(jc, i) - c_p(jc, i) * varout_view(jc, i + 1);
});
Kokkos::fence();
}
c_p = Kokkos::View<T**>();
d_p = Kokkos::View<T**>();
// End timing and print the elapsed time
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_time = end_time - start_time;
std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl;
}
template
void tdma_solver_vec<double>(const double* a, const double* b, const double* c, const double* d,
int slev, int elev, int startidx, int endidx,
int nrows, int ncols, double* varout);
template
void tdma_solver_vec<float>(const float* a, const float* b, const float* c, const float* d,
int slev, int elev, int startidx, int endidx,
int nrows, int ncols, float* varout);