Skip to content
Snippets Groups Projects
Commit 6416163a authored by Pradipta Samanta's avatar Pradipta Samanta
Browse files

replaced the lambda function for calculating combined index with a macro function

fixed a bug
parent 5ee6fbde
No related branches found
No related tags found
1 merge request!37Draft: C++ port of horizontal/mo_lib_gradients.F90
...@@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, ...@@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
double* cp = new double[nrows * ncols]; double* cp = new double[nrows * ncols];
double* dp = new double[nrows * ncols]; double* dp = new double[nrows * ncols];
// Helper function to access 2D arrays stored as 1D #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function
auto idx = [&](int row, int col) { return col * nrows + row; };
// Start timing
auto start_time = std::chrono::high_resolution_clock::now();
// OpenACC Parallel Region // OpenACC Parallel Region
#pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue) #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
...@@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, ...@@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
// Initialize c-prime and d-prime // Initialize c-prime and d-prime
#pragma acc loop gang(static: 1) vector #pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) { for (int jc = startidx; jc < endidx; ++jc) {
cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)]; cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)];
dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)]; dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)];
} }
// Solve for vectors c-prime and d-prime // Solve for vectors c-prime and d-prime
...@@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, ...@@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
for (int i = slev + 1; i < elev; ++i) { for (int i = slev + 1; i < elev; ++i) {
#pragma acc loop gang(static: 1) vector #pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) { for (int jc = startidx; jc < endidx; ++jc) {
double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]); double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]);
cp[idx(jc, i)] = c[idx(jc, i)] * m; cp[IDX(jc, i)] = c[IDX(jc, i)] * m;
dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m; dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m;
} }
} }
// Initialize varout // Initialize varout
#pragma acc loop gang(static: 1) vector #pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) { for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)]; varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)];
} }
// Solve for varout from the vectors c-prime and d-prime // Solve for varout from the vectors c-prime and d-prime
...@@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, ...@@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
for (int i = elev - 2; i >= slev; --i) { for (int i = elev - 2; i >= slev; --i) {
#pragma acc loop gang(static: 1) vector #pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) { for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)]; varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)];
} }
} }
} }
...@@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d, ...@@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
// Free memory at the end // Free memory at the end
delete[] cp; delete[] cp;
delete[] dp; delete[] dp;
// End timing // End timing
auto end_time = std::chrono::high_resolution_clock::now(); auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_time = end_time - start_time; std::chrono::duration<double> elapsed_time = end_time - start_time;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment