Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • icon-libraries/libiconmath
1 result
Show changes
Commits on Source (6)
......@@ -22,6 +22,5 @@ iconmath_Tests
# Test stage files:
/**/Testing/*
run_tests.f90
iconmath_Tests
run_tests.f90
......@@ -14,7 +14,9 @@ cmake_minimum_required(VERSION 3.18)
project(
iconmath
VERSION 1.0.0
LANGUAGES Fortran)
LANGUAGES Fortran CXX)
set(CMAKE_CXX_STANDARD 17)
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(BUILD_TESTING "Build tests" ON)
......@@ -23,6 +25,7 @@ option(BUILD_ICONMATH_HORIZONTAL "Build horizontal library" ON)
option(IM_ENABLE_MIXED_PRECISION "Enable mixed precision" OFF)
option(IM_ENABLE_LOOP_EXCHANGE "Enable loop exchange" OFF)
option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF)
option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF)
option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF)
option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF)
......
......@@ -13,9 +13,11 @@ add_library(
iconmath-support
mo_gridman_constants.f90
mo_lib_grid_geometry_info.f90
mo_lib_loopindices.f90
mo_lib_loopindices.cpp
mo_lib_loopindices.F90
mo_math_constants.f90
mo_math_types.f90
mo_math_utilities.cpp
mo_math_utilities.F90
mo_random_number_generators.F90)
......@@ -40,6 +42,10 @@ if(IM_ENABLE_DIM_SWAP)
target_compile_definitions(iconmath-support PRIVATE __SWAPDIM)
endif()
if(IM_USE_CPP_BINDINGS)
target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS)
endif()
if(IM_ENABLE_OPENACC)
# If _OPENACC is defined, assume that the required compiler flags are already
# provided, e.g. in CMAKE_Fortran_FLAGS:
......@@ -57,7 +63,16 @@ target_include_directories(
# Path to the Fortran modules:
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>>
$<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
)
INTERFACE
# Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
# multiple compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
PRIVATE
# Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
# compile languages
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>)
target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support)
......
......@@ -16,12 +16,18 @@
MODULE mo_lib_loopindices
#ifdef __USE_CPP_BINDINGS
USE, INTRINSIC :: ISO_C_BINDING
#endif
IMPLICIT NONE
PRIVATE
PUBLIC :: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib
#ifndef __USE_CPP_BINDINGS
CONTAINS
!-------------------------------------------------------------------------
......@@ -121,5 +127,31 @@ CONTAINS
END SUBROUTINE get_indices_v_lib
END MODULE mo_lib_loopindices
#else
INTERFACE
SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib")
IMPORT :: C_INT
INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(C_INT) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_c_lib
SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib")
IMPORT :: C_INT
INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(C_INT) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_e_lib
SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib")
IMPORT :: C_INT
INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
INTEGER(C_INT) :: i_startidx_out, i_endidx_out
END SUBROUTINE get_indices_v_lib
END INTERFACE
#endif
END MODULE mo_lib_loopindices
#include <algorithm> // For std::max
extern "C" {
// get_indices_c_lib function
void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
int &i_startidx_out, int &i_endidx_out) {
if (i_blk == i_startblk) {
i_startidx_out = std::max(1, i_startidx_in);
i_endidx_out = nproma;
if (i_blk == i_endblk) {
i_endidx_out = i_endidx_in;
}
} else if (i_blk == i_endblk) {
i_startidx_out = 1;
i_endidx_out = i_endidx_in;
} else {
i_startidx_out = 1;
i_endidx_out = nproma;
}
}
// get_indices_e_lib function
void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
int &i_startidx_out, int &i_endidx_out) {
i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in);
i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in;
}
// get_indices_v_lib function
void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
int &i_startidx_out, int &i_endidx_out) {
if (i_blk == i_startblk) {
i_startidx_out = i_startidx_in;
i_endidx_out = nproma;
if (i_blk == i_endblk) {
i_endidx_out = i_endidx_in;
}
} else if (i_blk == i_endblk) {
i_startidx_out = 1;
i_endidx_out = i_endidx_in;
} else {
i_startidx_out = 1;
i_endidx_out = nproma;
}
}
}
......@@ -22,6 +22,7 @@
! #endif
MODULE mo_math_utilities
USE, INTRINSIC :: ISO_C_BINDING
USE mo_iconlib_kind, ONLY: wp, dp, sp
USE mo_math_constants, ONLY: pi, pi_2, dbl_eps
USE mo_gridman_constants, ONLY: SUCCESS, TORUS_MAX_LAT
......@@ -160,7 +161,98 @@ MODULE mo_math_utilities
CHARACTER(LEN=*), PARAMETER :: modname = 'mo_math_utilities'
!-------------------------------------------------------------------------
!>
!! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
!!
!! a - sub-diagonal (means it is the diagonal below the main diagonal)
!! b - the main diagonal
!! c - sup-diagonal (means it is the diagonal above the main diagonal)
!! d - right part
!! varout - the answer (identical to x in description above)
!! slev - start level (top)
!! elev - end level (bottom)
! Preprocessor directive to conditionally include the tdma_solver_vec implementation
#ifndef __USE_CPP_BINDINGS
CONTAINS
SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
INTEGER, INTENT(IN) :: slev, elev
INTEGER, INTENT(IN) :: startidx, endidx
REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
REAL(wp), INTENT(OUT) :: varout(:, :)
INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
!
! local
REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
INTEGER :: i
INTEGER :: jc
INTEGER :: acc_queue
IF (PRESENT(opt_acc_queue)) THEN
acc_queue = opt_acc_queue
ELSE
acc_queue = 1
END IF
! initialize c-prime and d-prime
!$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
c_p(jc, slev) = c(jc, slev)/b(jc, slev)
d_p(jc, slev) = d(jc, slev)/b(jc, slev)
END DO
! solve for vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = slev + 1, elev
!$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
DO jc = startidx, endidx
m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
c_p(jc, i) = c(jc, i)*m
d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
END DO
END DO
! initialize varout
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, elev) = d_p(jc, elev)
END DO
! solve for varout from the vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = elev - 1, slev, -1
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
END DO
END DO
!$ACC END PARALLEL
IF (.NOT. PRESENT(opt_acc_queue)) THEN
!$ACC WAIT(acc_queue)
END IF
END SUBROUTINE tdma_solver_vec
#else
! C++ binding for tdma_solver_vec
INTERFACE
SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec")
IMPORT :: C_DOUBLE, C_INT
REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*)
INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx
REAL(C_DOUBLE), INTENT(OUT) :: varout(*)
INTEGER(C_INT), OPTIONAL :: opt_acc_queue
END SUBROUTINE tdma_solver_vec
END INTERFACE
CONTAINS
#endif
!-------------------------------------------------------------------------
! Variant for double-precision (or working-precision=dp) lon+lat in ICON
......@@ -2019,78 +2111,6 @@ CONTAINS
END SUBROUTINE tdma_solver
!-------------------------------------------------------------------------
!>
!! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
!!
!! a - sub-diagonal (means it is the diagonal below the main diagonal)
!! b - the main diagonal
!! c - sup-diagonal (means it is the diagonal above the main diagonal)
!! d - right part
!! varout - the answer (identical to x in description above)
!! slev - start level (top)
!! elev - end level (bottom)
SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
INTEGER, INTENT(IN) :: slev, elev
INTEGER, INTENT(IN) :: startidx, endidx
REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
REAL(wp), INTENT(OUT) :: varout(:, :)
INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
!
! local
REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
INTEGER :: i
INTEGER :: jc
INTEGER :: acc_queue
IF (PRESENT(opt_acc_queue)) THEN
acc_queue = opt_acc_queue
ELSE
acc_queue = 1
END IF
! initialize c-prime and d-prime
!$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
c_p(jc, slev) = c(jc, slev)/b(jc, slev)
d_p(jc, slev) = d(jc, slev)/b(jc, slev)
END DO
! solve for vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = slev + 1, elev
!$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
DO jc = startidx, endidx
m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
c_p(jc, i) = c(jc, i)*m
d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
END DO
END DO
! initialize varout
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, elev) = d_p(jc, elev)
END DO
! solve for varout from the vectors c-prime and d-prime
!$ACC LOOP SEQ
!NEC$ outerloop_unroll(4)
DO i = elev - 1, slev, -1
!$ACC LOOP GANG(STATIC: 1) VECTOR
DO jc = startidx, endidx
varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
END DO
END DO
!$ACC END PARALLEL
IF (.NOT. PRESENT(opt_acc_queue)) THEN
!$ACC WAIT(acc_queue)
END IF
END SUBROUTINE tdma_solver_vec
!-------------------------------------------------------------------------
!-------------------------------------------------------------------------
!
!> Helper functions for computing the vertical layer structure
......
#include <vector>
#include <iostream>
#include <chrono> // For timing
extern "C" {
void tdma_solver_vec(double *a, double *b, double *c, double *d,
int slev, int elev, int startidx, int endidx,
double* varout, int opt_acc_queue = -1) {
int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided
// Determine array sizes based on startidx and endidx
int nrows = endidx - startidx;
int ncols = elev - slev;
// Temporary arrays for c-prime and d-prime
std::vector<double> cp(nrows * ncols, 0.0);
std::vector<double> dp(nrows * ncols, 0.0);
// Helper function to access 2D arrays stored as 1D
auto idx = [&](int row, int col) { return col * nrows + row; };
// Start timing
auto start_time = std::chrono::high_resolution_clock::now();
// OpenACC Parallel Region
#pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
{
// Initialize c-prime and d-prime
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)];
dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)];
}
// Solve for vectors c-prime and d-prime
#pragma acc loop seq
for (int i = slev + 1; i < elev; ++i) {
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]);
cp[idx(jc, i)] = c[idx(jc, i)] * m;
dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m;
}
}
// Initialize varout
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)];
}
// Solve for varout from the vectors c-prime and d-prime
#pragma acc loop seq
for (int i = elev - 2; i >= slev; --i) {
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)];
}
}
}
printf("tdma_solver_vec: completed using C++\n");
// Wait for OpenACC asynchronous operations to complete if acc_queue is not optional
if (opt_acc_queue == -1) {
#pragma acc wait(acc_queue)
}
// End timing
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_time = end_time - start_time;
std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl;
}
}
......@@ -24,7 +24,7 @@ MODULE TEST_mo_math_utilities
PRIVATE
PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec
REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius
REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m] average radius
CONTAINS
......