From 6416163ab3904752c0e7364e41046a285a2fb4a1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 3 Jan 2025 09:34:24 +0100 Subject: [PATCH] replaced the lambda function for calculating combined index with a macro function fixed a bug --- src/support/mo_math_utilities.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp index f82cd27..c2b46dc 100644 --- a/src/support/mo_math_utilities.cpp +++ b/src/support/mo_math_utilities.cpp @@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, double* cp = new double[nrows * ncols]; double* dp = new double[nrows * ncols]; - // Helper function to access 2D arrays stored as 1D - auto idx = [&](int row, int col) { return col * nrows + row; }; - - // Start timing - auto start_time = std::chrono::high_resolution_clock::now(); + #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function // OpenACC Parallel Region #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) @@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Initialize c-prime and d-prime #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; - dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; + cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)]; + dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)]; } // Solve for vectors c-prime and d-prime @@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = slev + 1; i < elev; ++i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); - cp[idx(jc, i)] = c[idx(jc, i)] * m; - dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; + double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]); + cp[IDX(jc, i)] = c[IDX(jc, i)] * m; + dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m; } } // Initialize varout #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; + varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)]; } // Solve for varout from the vectors c-prime and d-prime @@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, for (int i = elev - 2; i >= slev; --i) { #pragma acc loop gang(static: 1) vector for (int jc = startidx; jc < endidx; ++jc) { - varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; + varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)]; } } } @@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, // Free memory at the end delete[] cp; delete[] dp; + // End timing auto end_time = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_time = end_time - start_time; -- GitLab