Skip to content
Snippets Groups Projects

Draft: adding cpp version of codes

Open Pradipta Samanta requested to merge feature-add-cpp-codes into main
1 unresolved thread
1 file
+ 9
12
Compare changes
  • Side-by-side
  • Inline
@@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
double* cp = new double[nrows * ncols];
double* dp = new double[nrows * ncols];
// Helper function to access 2D arrays stored as 1D
auto idx = [&](int row, int col) { return col * nrows + row; };
// Start timing
auto start_time = std::chrono::high_resolution_clock::now();
#define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function
// OpenACC Parallel Region
#pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
@@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
// Initialize c-prime and d-prime
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)];
dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)];
cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)];
dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)];
}
// Solve for vectors c-prime and d-prime
@@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
for (int i = slev + 1; i < elev; ++i) {
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]);
cp[idx(jc, i)] = c[idx(jc, i)] * m;
dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m;
double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]);
cp[IDX(jc, i)] = c[IDX(jc, i)] * m;
dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m;
}
}
// Initialize varout
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)];
varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)];
}
// Solve for varout from the vectors c-prime and d-prime
@@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
for (int i = elev - 2; i >= slev; --i) {
#pragma acc loop gang(static: 1) vector
for (int jc = startidx; jc < endidx; ++jc) {
varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)];
varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)];
}
}
}
@@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
// Free memory at the end
delete[] cp;
delete[] dp;
// End timing
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_time = end_time - start_time;
Loading