From 9a77e6f1a4d798c2fce7b3746431fb0478d125d2 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 24 Mar 2025 21:53:24 +0100 Subject: [PATCH 01/34] made the cpp codes run on GPUs and modified the tests accordingly removed some comments --- .gitlab-ci.yml | 5 +- .../mo_lib_interpolation_scalar.cpp | 5 +- test/c/CMakeLists.txt | 12 +- test/c/test_horizontal_div.cpp | 481 +++++++++++------- 4 files changed, 304 insertions(+), 199 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9837066..c12492b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -65,9 +65,10 @@ nvhpc_cpu: variables: SCHEDULER_PARAMETERS: "$SLURM_OPTIONS_CPU $SLURM_NTASKS" script: - - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/22.5-gcc-11.2.0 + - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/24.7-gcc-11.2.0 - mkdir nvhpc_cpu - cd nvhpc_cpu + - export LD_LIBRARY_PATH=/sw/spack-levante/gcc-11.2.0-bcn7mb/lib64:$LD_LIBRARY_PATH - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran - make VERBOSE=1 - make test @@ -111,7 +112,7 @@ OpenMP_gcc: - module load git gcc/11.2.0-gcc-11.2.0 - mkdir openmp_gcc - cd openmp_gcc - - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran -DIM_ENABLE_OPENMP=ON + - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran -DIM_ENABLE_GPU=nvidia-sm80 -DIM_ENABLE_OPENMP=ON - make VERBOSE=1 - make test tags: diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 9e4e6c5..8910cb2 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -136,10 +136,7 @@ void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, p_edge_out_view(je, jk, jb) = p_cell_in_view( iidx_view(je, jb, 1), jk, iblk_view(je, jb, 1)); } else { - std::cerr << "mo_interpolation:cells2edges_scalar_lib: error in " - "lateral boundary filling" - << std::endl; - std::exit(EXIT_FAILURE); + Kokkos::abort("mo_interpolation:cells2edges_scalar_lib: error in lateral boundary filling"); } }); Kokkos::fence(); diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 90ab1e3..0b42439 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -33,12 +33,12 @@ endif() set(SOURCES main.cpp test_horizontal_div.cpp - test_horizontal_recon.cpp - test_horizontal_rot.cpp - test_tdma_solver.cpp - test_interpolation_vector.cpp - test_intp_rbf.cpp - test_interpolation_scalar.cpp + # test_horizontal_recon.cpp + # test_horizontal_rot.cpp + # test_tdma_solver.cpp + # test_interpolation_vector.cpp + # test_intp_rbf.cpp + # test_interpolation_scalar.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index 596d19e..2d06bc0 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -20,7 +20,8 @@ #include <support/mo_lib_loopindices.hpp> /// Test class for the horizontal divergence tests. Templated for the ValueType -template <typename ValueType> class HorizontalDivTest : public ::testing::Test { +template <typename ValueType> +class HorizontalDivTest : public ::testing::Test { protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 2; // number of vertical levels @@ -36,43 +37,51 @@ protected: std::vector<int> elev; bool lacc = false; // Not using ACC-specific behavior. - std::vector<ValueType> vec_e; - std::vector<int> cell_edge_idx; - std::vector<int> cell_edge_blk; - std::vector<ValueType> geofac_div; - std::vector<ValueType> div_vec_c; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; + // Here we allocate Kokkos::View objects in a memory space that is directly accessible + // from both the host and device + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; - // Followings are needed in HorizontalDivAvgTest - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> avg_coeff; - std::vector<ValueType> opt_in2; - std::vector<ValueType> opt_out2; + // Views for the test data. All the data is assigned as one-dimensional arrays + Kokkos::View<ValueType*, memory_space> vec_e; + Kokkos::View<int*, memory_space> cell_edge_idx; + Kokkos::View<int*, memory_space> cell_edge_blk; + Kokkos::View<ValueType*, memory_space> geofac_div; + Kokkos::View<ValueType*, memory_space> div_vec_c; + Kokkos::View<ValueType*, memory_space> f4din; + Kokkos::View<ValueType*, memory_space> f4dout; - HorizontalDivTest() { + // Followings are needed in HorizontalDivAvgTest + Kokkos::View<int*, memory_space> cell_neighbor_idx; + Kokkos::View<int*, memory_space> cell_neighbor_blk; + Kokkos::View<ValueType*, memory_space> avg_coeff; + Kokkos::View<ValueType*, memory_space> opt_in2; + Kokkos::View<ValueType*, memory_space> opt_out2; + + HorizontalDivTest() + : vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), + cell_edge_idx("cell_edge_idx", dim_combine(nproma, nblks_c, 3)), + cell_edge_blk("cell_edge_blk", dim_combine(nproma, nblks_c, 3)), + geofac_div("geofac_div", dim_combine(nproma, 3, nblks_c)), + div_vec_c("div_vec_c", dim_combine(nproma, nlev, nblks_c)), + f4din("f4din", dim_combine(nproma, nlev, nblks_e, dim4d)), + f4dout("f4dout", dim_combine(nproma, nlev, nblks_c, dim4d)), + cell_neighbor_idx("cell_neighbor_idx", dim_combine(nproma, nblks_c, 3)), + cell_neighbor_blk("cell_neighbor_blk", dim_combine(nproma, nblks_c, 3)), + avg_coeff("avg_coeff", dim_combine(nproma, nlev, nblks_c)), + opt_in2("opt_in2", dim_combine(nproma, nlev, nblks_e)), + opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) + { + + // We keep slev and elev as std::vector since they are small and used only on the host. slev.resize(dim4d, 0); elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); - geofac_div.resize(dim_combine(nproma, 3, nblks_c)); - div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); - avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); - opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); - opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); } }; /// ValueTypes which the divrot tests should run with typedef ::testing::Types<float, double> ValueTypes; - TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { @@ -86,34 +95,46 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { const auto &geofac_div_at = at<nproma, 3, nblks_c>; const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - // Initialization with specific values + // create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + + // Initialize the arrays with the same patterns as before. for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); } - // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 0)] = i; + cell_edge_idx_h[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; // All edges are in the same block for this test for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; } - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + geofac_div_h[geofac_div_at(i, 0, 0)] = static_cast<TypeParam>(0.5); + geofac_div_h[geofac_div_at(i, 1, 0)] = static_cast<TypeParam>(0.3); + geofac_div_h[geofac_div_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Initialize div_vec_c to zero for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); } } - - // Call the div3d function + // Copy the initialized data back to the device memory (or unified memory, which in some + // cases may be a no-op if already accessible on the host). + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + + // Call the div3d function using the device pointers from the Views. div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), this->geofac_div.data(), this->div_vec_c.data(), this->i_startblk, this->i_endblk, @@ -121,12 +142,14 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_c, this->nblks_e); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 0, 0)), static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 1, 0)), static_cast<TypeParam>(3.4), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 0, 0)), static_cast<TypeParam>(2.1), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 1, 0)), static_cast<TypeParam>(4.2), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 0, 0)), static_cast<TypeParam>(2.2), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 1, 0)), static_cast<TypeParam>(4.4), 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { @@ -140,43 +163,52 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { const auto &geofac_div_at = at<nproma, 3, nblks_c>; const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - // Set up random number generators + //create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + + // Initialize the arrays with random values. std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - // Initialization with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + vec_e_h[vec_e_at(i, k, 0)] = real_distrib(gen); } - // Set random edge indices for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; } - // Random geometric factors for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + geofac_div_h[geofac_div_at(i, j, 0)] = real_distrib(gen); } - // Initialize div_vec_c to random values for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + div_vec_c_h[div_vec_c_at(i, k, 0)] = real_distrib(gen); } } - // Call the div3d function - div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + + div3d<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->div_vec_c.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_c, + this->nblks_e); + + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -189,18 +221,18 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; } } } @@ -208,7 +240,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) << "Results differ at i=" << i << ", k=" << k; } @@ -229,36 +261,53 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto f4din_h = Kokkos::create_mirror_view(this->f4din); + auto f4dout_h = Kokkos::create_mirror_view(this->f4dout); + // Initialization with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->f4din[f4d_at(i, k, 0, 0)] = - (i + 1) * (k + 2); // Different pattern for second field + vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + f4din_h[f4d_at(i, k, 0, 0)] = static_cast<TypeParam>((i + 1) * (k + 2)); // Different pattern for second field } // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 0)] = i; + cell_edge_idx_h[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; // All edges are in the same block for this test for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; } // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + geofac_div_h[geofac_div_at(i, 0, 0)] = static_cast<TypeParam>(0.5); + geofac_div_h[geofac_div_at(i, 1, 0)] = static_cast<TypeParam>(0.3); + geofac_div_h[geofac_div_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Initialize div_vec_c and f4dout to zero for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); + f4dout_h[f4dout_at(i, k, 0, 0)] = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->f4din, f4din_h); + Kokkos::deep_copy(this->f4dout, f4dout_h); + // Call the div3d_2field function div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), this->geofac_div.data(), @@ -268,21 +317,25 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_c, this->nblks_e); + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(f4dout_h, this->f4dout); + // Check first field (same as in div3d test) - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.4), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.1), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.2), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.2), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.4), 1e-6); // Check second field (expected values calculated manually) - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(3.4), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(5.1), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(4.2), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(6.3), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(4.4), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(6.6), 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { @@ -299,6 +352,15 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto f4din_h = Kokkos::create_mirror_view(this->f4din); + auto f4dout_h = Kokkos::create_mirror_view(this->f4dout); + // Set up random number generators std::random_device rd; std::mt19937 gen(rd()); @@ -308,37 +370,48 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { // Initialization with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + vec_e_h[vec_e_at(i, k, 0)] = real_distrib(gen); + f4din_h[f4d_at(i, k, 0, 0)] = real_distrib(gen); } // Set random edge indices for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity } // Random geometric factors for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + geofac_div_h[geofac_div_at(i, j, 0)] = real_distrib(gen); } // Initialize div_vec_c and f4dout to random values for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + div_vec_c_h[div_vec_c_at(i, k, 0)] = real_distrib(gen); + f4dout_h[f4dout_at(i, k, 0, 0)] = real_distrib(gen); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->f4din, f4din_h); + Kokkos::deep_copy(this->f4dout, f4dout_h); + // Call the div3d_2field function - div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev[0], this->elev[0], this->nproma, this->lacc, + this->nlev, this->nblks_c, this->nblks_e); + + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(f4dout_h, this->f4dout); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -353,33 +426,33 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { for (int jc = i_startidx; jc < i_endidx; ++jc) { // Calculate reference value for first field ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; // Calculate reference value for second field ref_f4dout[f4dout_at(jc, jk, jb, 0)] = - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)], - 0)] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)], - 0)] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)], - 0)] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + f4din_h[f4d_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)], 0)] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + f4din_h[f4d_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)], 0)] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + f4din_h[f4d_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)], 0)] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; } } } @@ -387,7 +460,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { // Verify results for first field for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) << "First field results differ at i=" << i << ", k=" << k; } @@ -396,7 +469,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { // Verify results for second field for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + EXPECT_NEAR(f4dout_h[f4dout_at(i, k, 0, 0)], ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) << "Second field results differ at i=" << i << ", k=" << k; } @@ -415,42 +488,59 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + // Create mirror views to store data on the host + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto f4din_h = Kokkos::create_mirror_view(this->f4din); + auto f4dout_h = Kokkos::create_mirror_view(this->f4dout); + // Initialization for (int i = 0; i < nproma; ++i) { for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + cell_edge_idx_h[cell_edge_at(i, 0, j)] = (i + j) % nproma; + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; + geofac_div_h[geofac_div_at(i, j, 0)] = static_cast<TypeParam>(0.1 * (j + 1)); } for (int k = 0; k < nlev; ++k) { for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + f4din_h[f4din_at(i, k, 0, d)] = static_cast<TypeParam>(1.0 + i + k + d); + f4dout_h[f4dout_at(i, k, 0, d)] = static_cast<TypeParam>(0.0); } } } + // Copy initialized data to device + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->f4din, f4din_h); + Kokkos::deep_copy(this->f4dout, f4dout_h); + // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Copy results back to host for verification + Kokkos::deep_copy(f4dout_h, this->f4dout); + + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(1.4), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(1.1), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(1.1), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(2.0), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 1)], static_cast<TypeParam>(2.0), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 1)], static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 1)], static_cast<TypeParam>(1.7), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 1)], static_cast<TypeParam>(2.6), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 1)], static_cast<TypeParam>(2.3), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 1)], static_cast<TypeParam>(2.3), 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { @@ -465,6 +555,13 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + // Create mirror views to store data on the host + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto f4din_h = Kokkos::create_mirror_view(this->f4din); + auto f4dout_h = Kokkos::create_mirror_view(this->f4dout); + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -473,26 +570,36 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { // Initialize with random values for (int i = 0; i < nproma; ++i) { for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; + geofac_div_h[geofac_div_at(i, j, 0)] = real_distrib(gen); } for (int k = 0; k < nlev; ++k) { for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + f4din_h[f4din_at(i, k, 0, d)] = real_distrib(gen); + f4dout_h[f4dout_at(i, k, 0, d)] = static_cast<TypeParam>(0.0); } } } + // Copy initialized data to device + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->f4din, f4din_h); + Kokkos::deep_copy(this->f4dout, f4dout_h); + // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Copy results back to host for verification + Kokkos::deep_copy(f4dout_h, this->f4dout); // Compute reference result and check for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { @@ -506,13 +613,13 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { TypeParam expected = 0.0; for (int je = 0; je < 3; ++je) { expected += - this->f4din[f4din_at( - this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * - this->geofac_div[geofac_div_at(jc, je, jb)]; + f4din_h[f4din_at( + cell_edge_idx_h[cell_edge_at(jc, jb, je)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, je)], ji)] * + geofac_div_h[geofac_div_at(jc, je, jb)]; } - EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + EXPECT_NEAR(f4dout_h[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) << "Random test fails at jc=" << jc << ", jk=" << jk << ", jb=" << jb << ", ji=" << ji; } @@ -521,8 +628,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { } } -TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); - +/* TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -1068,3 +1174,4 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { } } } +*/ -- GitLab From c20359160d2664885501fa56fab9fb473fa77df6 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Mar 2025 10:44:28 +0100 Subject: [PATCH 02/34] fixed the errors in gitlab-ci.yml --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c12492b..5e0d58f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -65,7 +65,7 @@ nvhpc_cpu: variables: SCHEDULER_PARAMETERS: "$SLURM_OPTIONS_CPU $SLURM_NTASKS" script: - - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/24.7-gcc-11.2.0 + - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/22.5-gcc-11.2.0 - mkdir nvhpc_cpu - cd nvhpc_cpu - export LD_LIBRARY_PATH=/sw/spack-levante/gcc-11.2.0-bcn7mb/lib64:$LD_LIBRARY_PATH @@ -95,7 +95,7 @@ nvhpc_gpu: - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/24.7-gcc-11.2.0 - mkdir nvhpc_gpu - cd nvhpc_gpu - - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran -DIM_ENABLE_OPENACC=ON + - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran -DIM_ENABLE_GPU=nvidia-sm80 -DIM_ENABLE_OPENACC=ON - make VERBOSE=1 - make test tags: @@ -112,7 +112,7 @@ OpenMP_gcc: - module load git gcc/11.2.0-gcc-11.2.0 - mkdir openmp_gcc - cd openmp_gcc - - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran -DIM_ENABLE_GPU=nvidia-sm80 -DIM_ENABLE_OPENMP=ON + - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran -DIM_ENABLE_OPENMP=ON - make VERBOSE=1 - make test tags: -- GitLab From ff794e816f0e2b817fe154549d1caf990f0d6df1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Mar 2025 14:44:37 +0100 Subject: [PATCH 03/34] exported ld_Library_path for nvhpc_gpu --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5e0d58f..6877a94 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,7 +68,6 @@ nvhpc_cpu: - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/22.5-gcc-11.2.0 - mkdir nvhpc_cpu - cd nvhpc_cpu - - export LD_LIBRARY_PATH=/sw/spack-levante/gcc-11.2.0-bcn7mb/lib64:$LD_LIBRARY_PATH - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran - make VERBOSE=1 - make test @@ -95,6 +94,7 @@ nvhpc_gpu: - module load git gcc/11.2.0-gcc-11.2.0 nvhpc/24.7-gcc-11.2.0 - mkdir nvhpc_gpu - cd nvhpc_gpu + - export LD_LIBRARY_PATH=/sw/spack-levante/gcc-11.2.0-bcn7mb/lib64:$LD_LIBRARY_PATH - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran -DIM_ENABLE_GPU=nvidia-sm80 -DIM_ENABLE_OPENACC=ON - make VERBOSE=1 - make test -- GitLab From 34ea0a4daf9c4ba9d22a4d7f0d0a8ab81e209fc4 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Mar 2025 18:31:31 +0100 Subject: [PATCH 04/34] corrected memory allocations of two arrays --- src/horizontal/mo_lib_divrot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index d086e8b..67d13fd 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -1093,8 +1093,8 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); - UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_e); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, 4, nblks_c); UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); -- GitLab From 5a60cb4db63aabea0f999527464e17af7c182c44 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Mar 2025 18:46:00 +0100 Subject: [PATCH 05/34] modified rest of the tests in test_horizontal_div --- test/c/test_horizontal_div.cpp | 441 ++++++++++++++++++++------------- 1 file changed, 273 insertions(+), 168 deletions(-) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index 2d06bc0..bc6ea9f 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -68,7 +68,7 @@ protected: f4dout("f4dout", dim_combine(nproma, nlev, nblks_c, dim4d)), cell_neighbor_idx("cell_neighbor_idx", dim_combine(nproma, nblks_c, 3)), cell_neighbor_blk("cell_neighbor_blk", dim_combine(nproma, nblks_c, 3)), - avg_coeff("avg_coeff", dim_combine(nproma, nlev, nblks_c)), + avg_coeff("avg_coeff", dim_combine(nproma, 4, nblks_c)), opt_in2("opt_in2", dim_combine(nproma, nlev, nblks_e)), opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) { @@ -628,7 +628,6 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { } } -/* TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -640,6 +639,8 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { const auto &cell_edge_at = at<nproma, nblks_c, 3>; const auto &geofac_div_at = at<nproma, 3, nblks_c>; const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; // Vectors for additional parameters // Vectors for block and index ranges @@ -653,51 +654,71 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { bool l_limited_area = true; bool l2fields = true; - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto opt_in2_h = Kokkos::create_mirror_view(this->opt_in2); + auto opt_out2_h = Kokkos::create_mirror_view(this->opt_out2); // Initialize the vectors with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e + vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + opt_in2_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e } // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 0)] = i; + cell_edge_idx_h[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = i; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; // All edges and neighbors are in the same block for this test for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + geofac_div_h[geofac_div_at(i, 0, 0)] = static_cast<TypeParam>(0.5); + geofac_div_h[geofac_div_at(i, 1, 0)] = static_cast<TypeParam>(0.3); + geofac_div_h[geofac_div_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + avg_coeff_h[avg_coeff_at(i, 0, 0)] = static_cast<TypeParam>(0.4); // Self + avg_coeff_h[avg_coeff_at(i, 1, 0)] = static_cast<TypeParam>(0.2); // First neighbor + avg_coeff_h[avg_coeff_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Second neighbor + avg_coeff_h[avg_coeff_at(i, 3, 0)] = static_cast<TypeParam>(0.2); // Third neighbor // Initialize div_vec_c and opt_out2 to zero for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); + opt_out2_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->opt_in2, opt_in2_h); + Kokkos::deep_copy(this->opt_out2, opt_out2_h); + // Call the div_avg function div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), @@ -709,19 +730,25 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(opt_out2_h, this->opt_out2); + + // Verify first field results + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), 1e-6); + + // Verify second field results + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.94), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(1.88), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(1.02), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(2.04), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(1.04), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(2.08), 1e-6); } TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { @@ -749,47 +776,69 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto opt_in2_h = Kokkos::create_mirror_view(this->opt_in2); + auto opt_out2_h = Kokkos::create_mirror_view(this->opt_out2); + // Set up random number generators std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); // Initialize with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + vec_e_h[vec_e_at(i, k, 0)] = real_distrib(gen); + opt_in2_h[vec_e_at(i, k, 0)] = real_distrib(gen); } // Set random edge indices for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity } // Random geometric factors for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + geofac_div_h[geofac_div_at(i, j, 0)] = real_distrib(gen); } // Random average coefficients for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + avg_coeff_h[avg_coeff_at(i, j, 0)] = real_distrib(gen); } // Random initial values for div_vec_c and opt_out2 for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); + opt_out2_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->opt_in2, opt_in2_h); + Kokkos::deep_copy(this->opt_out2, opt_out2_h); + // Call the div_avg function div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), @@ -801,6 +850,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(opt_out2_h, this->opt_out2); + // Calculate reference values manually std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); @@ -816,32 +869,32 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; aux_c2[div_vec_c_at(jc, jk, jb)] = - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + opt_in2_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + opt_in2_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + opt_in2_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; } } } @@ -872,35 +925,35 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + avg_coeff_h[avg_coeff_at(jc, 0, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 0)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 0)])] * + avg_coeff_h[avg_coeff_at(jc, 1, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 1)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 1)])] * + avg_coeff_h[avg_coeff_at(jc, 2, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 2)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 2)])] * + avg_coeff_h[avg_coeff_at(jc, 3, jb)]; ref_opt_out2[div_vec_c_at(jc, jk, jb)] = aux_c2[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + avg_coeff_h[avg_coeff_at(jc, 0, jb)] + aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 0)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 0)])] * + avg_coeff_h[avg_coeff_at(jc, 1, jb)] + aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 1)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 1)])] * + avg_coeff_h[avg_coeff_at(jc, 2, jb)] + aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 2)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 2)])] * + avg_coeff_h[avg_coeff_at(jc, 3, jb)]; } } } @@ -908,11 +961,11 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) << "div_vec_c results differ at i=" << i << ", k=" << k; - EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + EXPECT_NEAR(opt_out2_h[div_vec_c_at(i, k, 0)], ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) << "opt_out2 results differ at i=" << i << ", k=" << k; } @@ -945,48 +998,71 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto opt_in2_h = Kokkos::create_mirror_view(this->opt_in2); + auto opt_out2_h = Kokkos::create_mirror_view(this->opt_out2); + // Initialize the vectors with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e + vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + opt_in2_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e } // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 0)] = i; + cell_edge_idx_h[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + cell_edge_idx_h[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = i; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; // All edges and neighbors are in the same block for this test for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + geofac_div_h[geofac_div_at(i, 0, 0)] = static_cast<TypeParam>(0.5); + geofac_div_h[geofac_div_at(i, 1, 0)] = static_cast<TypeParam>(0.3); + geofac_div_h[geofac_div_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + avg_coeff_h[avg_coeff_at(i, 0, 0)] = static_cast<TypeParam>(0.4); // Self + avg_coeff_h[avg_coeff_at(i, 1, 0)] = static_cast<TypeParam>(0.2); // First neighbor + avg_coeff_h[avg_coeff_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Second neighbor + avg_coeff_h[avg_coeff_at(i, 3, 0)] = static_cast<TypeParam>(0.2); // Third neighbor // Initialize div_vec_c and opt_out2 to zero for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); + opt_out2_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->opt_in2, opt_in2_h); + Kokkos::deep_copy(this->opt_out2, opt_out2_h); + // Call the div_avg function div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), @@ -998,19 +1074,25 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(opt_out2_h, this->opt_out2); + + // Verify first field results + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), 1e-6); + + // Since l2fields=false, opt_out2 should not be modified + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); } TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { @@ -1038,49 +1120,69 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + // Create mirror views to store data on the host + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto div_vec_c_h = Kokkos::create_mirror_view(this->div_vec_c); + auto opt_in2_h = Kokkos::create_mirror_view(this->opt_in2); + auto opt_out2_h = Kokkos::create_mirror_view(this->opt_out2); + // Set up random number generators std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); // Initialize with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway + vec_e_h[vec_e_at(i, k, 0)] = real_distrib(gen); + opt_in2_h[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway } // Set random edge indices for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); + cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity } // Random geometric factors for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + geofac_div_h[geofac_div_at(i, j, 0)] = real_distrib(gen); } // Random average coefficients for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + avg_coeff_h[avg_coeff_at(i, j, 0)] = real_distrib(gen); } // Random initial values for div_vec_c and opt_out2 for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway + div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); + opt_out2_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); // Not used but initialize anyway } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->geofac_div, geofac_div_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); + Kokkos::deep_copy(this->opt_in2, opt_in2_h); + Kokkos::deep_copy(this->opt_out2, opt_out2_h); + // Call the div_avg function with l2fields=false div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), @@ -1092,6 +1194,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); + // Copy results back to host for verification + Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); + Kokkos::deep_copy(opt_out2_h, this->opt_out2); + // Calculate reference values manually std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); @@ -1105,18 +1211,18 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + geofac_div_h[geofac_div_at(jc, 0, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + geofac_div_h[geofac_div_at(jc, 1, jb)] + + vec_e_h[vec_e_at( + cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + geofac_div_h[geofac_div_at(jc, 2, jb)]; } } } @@ -1147,19 +1253,19 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + avg_coeff_h[avg_coeff_at(jc, 0, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 0)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 0)])] * + avg_coeff_h[avg_coeff_at(jc, 1, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 1)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 1)])] * + avg_coeff_h[avg_coeff_at(jc, 2, jb)] + aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, 2)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, 2)])] * + avg_coeff_h[avg_coeff_at(jc, 3, jb)]; } } } @@ -1168,10 +1274,9 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // isn't updated for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) << "div_vec_c results differ at i=" << i << ", k=" << k; } } } -*/ -- GitLab From b43bd02f503163939a1e0c2fac085dc37a00b249 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 27 Mar 2025 11:57:42 +0100 Subject: [PATCH 06/34] modified the tests in horizontal_recon --- src/horizontal/mo_lib_divrot.cpp | 714 ++++++++++----------- test/c/CMakeLists.txt | 2 +- test/c/test_horizontal_recon.cpp | 1025 ++++++++++++++++++++---------- 3 files changed, 1042 insertions(+), 699 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index 67d13fd..d460211 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -36,9 +36,6 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_d("z_d", lsq_dim_c); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); @@ -64,25 +61,28 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::parallel_for( "recon_lsq_cell_l_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + T z_d[3]; // Local array instead of shared View + T z_qt_times_d[2]; + + z_d[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - p_cc_view(jc, jk, jb); - z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + z_d[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); - z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + z_d[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); // matrix multiplication Q^T d (partitioned into 2 dot products) - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); + z_qt_times_d[0] = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d[2]; + z_qt_times_d[1] = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d[2]; p_coeff_view(2, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); + lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d[1]; p_coeff_view(1, jc, jk, jb) = lsq_rmat_rdiag_c_view(jc, 0, jb) * - (z_qt_times_d(0) - + (z_qt_times_d[0] - lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); @@ -124,8 +124,6 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_b("z_b", lsq_dim_c); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); @@ -146,21 +144,22 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::parallel_for( "recon_lsq_cell_l_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + T z_b[3]; // Local array instead of shared View + z_b[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - p_cc_view(jc, jk, jb); - z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + z_b[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); - z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + z_b[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b[2]; p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b[2]; p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); if (l_consv) { @@ -201,8 +200,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); + // Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); + // Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -239,93 +238,95 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + T z_d[9]; // Local array instead of shared View + T z_qt_times_d[5]; + z_d[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d[3] = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d[4] = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d[5] = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d[6] = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d[7] = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_q_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - - p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); + // }); + // Kokkos::parallel_for( + // "recon_lsq_cell_q_step2", innerPolicy, + // KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d[0] = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d[8]; + z_qt_times_d[1] = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d[8]; + z_qt_times_d[2] = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d[8]; + z_qt_times_d[3] = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d[8]; + z_qt_times_d[4] = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d[8]; + + p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d[4]; p_coeff_view(4, jc, jk, jb) = ptr_rrdiag(jc, 3, jb) * - (z_qt_times_d(3) - + (z_qt_times_d[3] - ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(3, jc, jk, jb) = ptr_rrdiag(jc, 2, jb) * - (z_qt_times_d(2) - + (z_qt_times_d[2] - ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(2, jc, jk, jb) = ptr_rrdiag(jc, 1, jb) * - (z_qt_times_d(1) - + (z_qt_times_d[1] - ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(1, jc, jk, jb) = ptr_rrdiag(jc, 0, jb) * - (z_qt_times_d(0) - + (z_qt_times_d[0] - ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - @@ -398,78 +399,79 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::parallel_for( "recon_lsq_cell_q_svd_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + T z_b[9]; // Local array instead of shared View + z_b[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - p_cc_view(jc, jk, jb); - z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + z_b[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); - z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + z_b[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + z_b[3] = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - p_cc_view(jc, jk, jb); - z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + z_b[4] = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - p_cc_view(jc, jk, jb); - z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + z_b[5] = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - p_cc_view(jc, jk, jb); - z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + z_b[6] = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - p_cc_view(jc, jk, jb); - z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + z_b[7] = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - p_cc_view(jc, jk, jb); - z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + z_b[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_q_svd_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { + // }); + // Kokkos::parallel_for( + // "recon_lsq_cell_q_svd_step2", innerPolicy, + // KOKKOS_LAMBDA(const int jk, const int jc) { p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b[8]; p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b[8]; p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b[8]; p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b[8]; p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b[8]; p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - @@ -505,8 +507,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); + // Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); + // Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -543,136 +545,139 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_c_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + T z_d[9]; // Local array instead of shared View + T z_qt_times_d[9]; // Local array instead of shared View + + z_d[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - p_cc_view(jc, jk, jb); - z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + z_d[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); - z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + z_d[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + z_d[3] = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - p_cc_view(jc, jk, jb); - z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + z_d[4] = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - p_cc_view(jc, jk, jb); - z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + z_d[5] = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - p_cc_view(jc, jk, jb); - z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + z_d[6] = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - p_cc_view(jc, jk, jb); - z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + z_d[7] = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - p_cc_view(jc, jk, jb); - z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + z_d[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - }); - Kokkos::parallel_for( - "recon_lsq_cell_c_step2", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); - - p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); + // }); + // Kokkos::parallel_for( + // "recon_lsq_cell_c_step2", innerPolicy, + // KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d[0] = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d[8]; + z_qt_times_d[1] = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d[8]; + z_qt_times_d[2] = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d[8]; + z_qt_times_d[3] = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d[8]; + z_qt_times_d[4] = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d[8]; + z_qt_times_d[5] = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 5, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d[8]; + z_qt_times_d[6] = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 6, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 6, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 6, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 6, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 6, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 6, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 6, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 6, 8, jb) * z_d[8]; + z_qt_times_d[7] = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 7, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 7, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 7, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 7, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 7, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 7, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 7, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 7, 8, jb) * z_d[8]; + z_qt_times_d[8] = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d[0] + + lsq_qtmat_c_view(jc, 8, 1, jb) * z_d[1] + + lsq_qtmat_c_view(jc, 8, 2, jb) * z_d[2] + + lsq_qtmat_c_view(jc, 8, 3, jb) * z_d[3] + + lsq_qtmat_c_view(jc, 8, 4, jb) * z_d[4] + + lsq_qtmat_c_view(jc, 8, 5, jb) * z_d[5] + + lsq_qtmat_c_view(jc, 8, 6, jb) * z_d[6] + + lsq_qtmat_c_view(jc, 8, 7, jb) * z_d[7] + + lsq_qtmat_c_view(jc, 8, 8, jb) * z_d[8]; + + p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d[8]; p_coeff_view(8, jc, jk, jb) = ptr_rrdiag(jc, 7, jb) * - (z_qt_times_d(7) - + (z_qt_times_d[7] - ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); p_coeff_view(7, jc, jk, jb) = ptr_rrdiag(jc, 6, jb) * - (z_qt_times_d(6) - + (z_qt_times_d[6] - (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(6, jc, jk, jb) = ptr_rrdiag(jc, 5, jb) * - (z_qt_times_d(5) - + (z_qt_times_d[5] - (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * - (z_qt_times_d(4) - + (z_qt_times_d[4] - (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(4, jc, jk, jb) = ptr_rrdiag(jc, 3, jb) * - (z_qt_times_d(3) - + (z_qt_times_d[3] - (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + @@ -680,7 +685,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(3, jc, jk, jb) = ptr_rrdiag(jc, 2, jb) * - (z_qt_times_d(2) - + (z_qt_times_d[2] - (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + @@ -689,7 +694,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(2, jc, jk, jb) = ptr_rrdiag(jc, 1, jb) * - (z_qt_times_d(1) - + (z_qt_times_d[1] - (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + @@ -699,7 +704,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(1, jc, jk, jb) = ptr_rrdiag(jc, 0, jb) * - (z_qt_times_d(0) - + (z_qt_times_d[0] - (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + @@ -748,7 +753,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_b("z_b", 9); + // Kokkos::View<T *> z_b("z_b", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -786,115 +791,116 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::parallel_for( "recon_lsq_cell_c_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + T z_b[9]; // Local array instead of shared View + z_b[0] = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - p_cc_view(jc, jk, jb); - z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + z_b[1] = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); - z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + z_b[2] = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + z_b[3] = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - p_cc_view(jc, jk, jb); - z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + z_b[4] = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - p_cc_view(jc, jk, jb); - z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + z_b[5] = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - p_cc_view(jc, jk, jb); - z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + z_b[6] = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - p_cc_view(jc, jk, jb); - z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + z_b[7] = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - p_cc_view(jc, jk, jb); - z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + z_b[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); p_coeff_view(9, jc, jk, jb) = - lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b[8]; p_coeff_view(8, jc, jk, jb) = - lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b[8]; p_coeff_view(7, jc, jk, jb) = - lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b[8]; p_coeff_view(6, jc, jk, jb) = - lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b[8]; p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b[8]; p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b[8]; p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b[8]; p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b[8]; p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b[0] + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b[1] + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b[2] + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b[3] + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b[4] + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b[5] + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b[6] + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b[7] + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b[8]; p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 0b42439..f4c5e27 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -33,8 +33,8 @@ endif() set(SOURCES main.cpp test_horizontal_div.cpp - # test_horizontal_recon.cpp # test_horizontal_rot.cpp + test_horizontal_recon.cpp # test_tdma_solver.cpp # test_interpolation_vector.cpp # test_intp_rbf.cpp diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index 8938a10..089c58f 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -14,8 +14,8 @@ #include <vector> #include <Kokkos_Core.hpp> -#include <gtest/gtest.h> #include <dim_helper.hpp> +#include <gtest/gtest.h> #include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> @@ -26,8 +26,8 @@ enum class ReconstructionMethod { cubic, }; -/// Base test class for the horizontal reconstruct tests. Templated for the ValueType -/// and ReconMethod for the reconstruction method. +/// Base test class for the horizontal reconstruct tests. Templated for the +/// ValueType and ReconMethod for the reconstruction method. template <typename ValueType, int ReconMethod> class HorizontalReconTest : public ::testing::Test { protected: @@ -66,28 +66,40 @@ protected: bool l_consv = true; // With conservative correction. bool l_limited_area = true; // Limited area setup - std::vector<ValueType> p_cc; - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> lsq_qtmat_c; - std::vector<ValueType> lsq_rmat_rdiag_c; - std::vector<ValueType> lsq_rmat_utri_c; - std::vector<ValueType> lsq_moments; - std::vector<ValueType> lsq_pseudoinv; - std::vector<ValueType> p_coeff; - - HorizontalReconTest() { - p_cc.resize(dim_combine(nproma, nlev, nblks_c)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); - lsq_rmat_utri_c.resize(dim_combine( - nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); - lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); - lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); - } + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Kokkos Views for test data + Kokkos::View<ValueType *, memory_space> p_cc; + Kokkos::View<int *, memory_space> cell_neighbor_idx; + Kokkos::View<int *, memory_space> cell_neighbor_blk; + Kokkos::View<ValueType *, memory_space> lsq_qtmat_c; + Kokkos::View<ValueType *, memory_space> lsq_rmat_rdiag_c; + Kokkos::View<ValueType *, memory_space> lsq_rmat_utri_c; + Kokkos::View<ValueType *, memory_space> lsq_moments; + Kokkos::View<ValueType *, memory_space> lsq_pseudoinv; + Kokkos::View<ValueType *, memory_space> p_coeff; + + HorizontalReconTest() + : p_cc("p_cc", dim_combine(nproma, nlev, nblks_c)), + cell_neighbor_idx("cell_neighbor_idx", + dim_combine(nproma, nblks_c, lsq_dim_c)), + cell_neighbor_blk("cell_neighbor_blk", + dim_combine(nproma, nblks_c, lsq_dim_c)), + lsq_qtmat_c("lsq_qtmat_c", + dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)), + lsq_rmat_rdiag_c("lsq_rmat_rdiag_c", + dim_combine(nproma, lsq_dim_unk, nblks_c)), + lsq_rmat_utri_c( + "lsq_rmat_utri_c", + dim_combine(nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c)), + lsq_moments("lsq_moments", dim_combine(nproma, nblks_c, lsq_dim_unk)), + lsq_pseudoinv("lsq_pseudoinv", + dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)), + p_coeff("p_coeff", + dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)) {} }; /// Test class for the horizontal tests. The reconstruction method is specified @@ -134,30 +146,54 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); + + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 1)] = i; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 2)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; + lsq_qtmat_c_h[qtmat_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_qtmat_c_h[qtmat_at(i, 1, j, 0)] = static_cast<TypeParam>(0.5); } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, 0, 0)] = static_cast<TypeParam>(2.0); + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, 1, 0)] = static_cast<TypeParam>(2.0); + lsq_rmat_utri_c_h[rmat_utri_at(i, 0, 0)] = static_cast<TypeParam>(0.1); - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -168,16 +204,19 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.34, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(0.34), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.8, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(1.8), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 1.0, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(1.0), 1e-6); } TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { @@ -196,6 +235,18 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -203,26 +254,37 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; + lsq_qtmat_c_h[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + lsq_qtmat_c_h[qtmat_at(i, 1, j, 0)] = real_distrib(gen); } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + lsq_rmat_utri_c_h[rmat_utri_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, 0)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, 1)] = real_distrib(gen); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -233,10 +295,14 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, @@ -244,30 +310,30 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } z_qt_times_d[0] = 0.0; z_qt_times_d[1] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; - z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + z_qt_times_d[0] += lsq_qtmat_c_h[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += lsq_qtmat_c_h[qtmat_at(jc, 1, i, jb)] * z_d[i]; } p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 0, jb)] * (z_qt_times_d[0] - - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + lsq_rmat_utri_c_h[rmat_utri_at(jc, 0, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - + p_cc_h[p_cc_at(jc, jk, jb)] - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - + lsq_moments_h[moments_at(jc, jb, 0)] - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; + lsq_moments_h[moments_at(jc, jb, 1)]; } } } @@ -275,7 +341,7 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) << "For loop result fails for i = " << i << ", jc = " << jc; } @@ -295,26 +361,46 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); + + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 1)] = i; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 2)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; + lsq_pseudoinv_h[pseudoinv_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_pseudoinv_h[pseudoinv_at(i, 1, j, 0)] = static_cast<TypeParam>(0.5); } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_l_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -324,16 +410,19 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.65, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(0.65), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(1.0), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(0.5), 1e-6); } TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { @@ -349,6 +438,16 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -356,22 +455,31 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; + lsq_pseudoinv_h[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + lsq_pseudoinv_h[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, 0)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, 1)] = real_distrib(gen); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_l_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -381,6 +489,9 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); @@ -391,26 +502,26 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + lsq_pseudoinv_h[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + lsq_pseudoinv_h[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + lsq_pseudoinv_h[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + lsq_pseudoinv_h[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + lsq_pseudoinv_h[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + lsq_pseudoinv_h[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - + p_cc_h[p_cc_at(jc, jk, jb)] - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - + lsq_moments_h[moments_at(jc, jb, 0)] - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; + lsq_moments_h[moments_at(jc, jb, 1)]; } } } @@ -418,7 +529,7 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) << "For loop result fails for i = " << i << ", jc = " << jc; } @@ -443,43 +554,65 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, 0)] = 0; for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = i; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + lsq_qtmat_c_h[qtmat_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_qtmat_c_h[qtmat_at(i, 1, j, 0)] = static_cast<TypeParam>(0.5); + lsq_qtmat_c_h[qtmat_at(i, 2, j, 0)] = static_cast<TypeParam>(0.2); + lsq_qtmat_c_h[qtmat_at(i, 3, j, 0)] = static_cast<TypeParam>(0.7); + lsq_qtmat_c_h[qtmat_at(i, 4, j, 0)] = static_cast<TypeParam>(1.3); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, j, 0)] = static_cast<TypeParam>(2.0); } for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + lsq_rmat_utri_c_h[rmat_utri_at(i, j, 0)] = static_cast<TypeParam>(1.0); } - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); + lsq_moments_h[moments_at(i, 0, 2)] = static_cast<TypeParam>(0.4); + lsq_moments_h[moments_at(i, 0, 3)] = static_cast<TypeParam>(0.5); + lsq_moments_h[moments_at(i, 0, 4)] = static_cast<TypeParam>(0.6); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_q<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -491,25 +624,28 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.24, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(0.24), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 3.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(3.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -2.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(-2.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 2.8, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + static_cast<TypeParam>(2.8), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -3.8, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + static_cast<TypeParam>(-3.8), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 2.6, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + static_cast<TypeParam>(2.6), 1e-6); } TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { @@ -528,6 +664,19 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + + // Use fixed seed for reproducibility std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -535,29 +684,39 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_unk; ++j) { for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + lsq_qtmat_c_h[qtmat_at(i, j, k, 0)] = real_distrib(gen); } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, j)] = real_distrib(gen); } for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + lsq_rmat_utri_c_h[rmat_utri_at(i, j, 0)] = real_distrib(gen); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_q<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -569,46 +728,89 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + + // Create host views for reference computation + using host_space = Kokkos::HostSpace; + + // Arrays for intermediate calculations + Kokkos::View<TypeParam ***, host_space> z_d_h("z_d_h", lsq_dim_c, nproma, + nlev); + Kokkos::View<TypeParam *, host_space> z_qt_times_d_h("z_qt_times_d_h", + lsq_dim_unk); + + // Result view + Kokkos::View<TypeParam **, host_space> p_result_h("p_result_h", + lsq_dim_unk + 1, nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + // Step 1: Calculate z_d values (matches the "recon_lsq_cell_q_step1" + // parallel_for) for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d_h(i, jc, jk) = + p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } + } + } + + // Step 2: Calculate coefficients (matches the "recon_lsq_cell_q_step2" + // parallel_for) + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Matrix multiplication (Q^T * d) for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d[j] = 0.0; + z_qt_times_d_h(j) = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + z_qt_times_d_h(j) += + lsq_qtmat_c_h[qtmat_at(jc, j, i, jb)] * z_d_h(i, jc, jk); } } - int utri_id = 0; - for (int j = lsq_dim_unk; j > 0; --j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; - for (int k = j + 1; k <= lsq_dim_unk; ++k) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; + + // Back-substitution (mirrors the order in the GPU implementation) + p_result_h(5, jc) = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 4, jb)] * z_qt_times_d_h(4); + + p_result_h(4, jc) = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 3, jb)] * + (z_qt_times_d_h(3) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 0, jb)] * p_result_h(5, jc)); + + p_result_h(3, jc) = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 2, jb)] * + (z_qt_times_d_h(2) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 1, jb)] * p_result_h(4, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 2, jb)] * p_result_h(5, jc)); + + p_result_h(2, jc) = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 1, jb)] * + (z_qt_times_d_h(1) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 3, jb)] * p_result_h(3, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 4, jb)] * p_result_h(4, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 5, jb)] * p_result_h(5, jc)); + + p_result_h(1, jc) = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d_h(0) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 6, jb)] * p_result_h(2, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 7, jb)] * p_result_h(3, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 8, jb)] * p_result_h(4, jc) - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 9, jb)] * p_result_h(5, jc)); + + // Conservation correction + p_result_h(0, jc) = p_cc_h[p_cc_at(jc, jk, jb)]; for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; + p_result_h(0, jc) -= + p_result_h(j + 1, jc) * lsq_moments_h[moments_at(jc, jb, j)]; } } } @@ -617,8 +819,7 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + EXPECT_NEAR(p_coeff_h[p_coeff_at(i, jc, 0, 0)], p_result_h(i, jc), 1e-5) << "For loop result fails for i = " << i << ", jc = " << jc; } } @@ -637,35 +838,53 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, 0)] = 0; for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = i; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + lsq_pseudoinv_h[pseudoinv_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_pseudoinv_h[pseudoinv_at(i, 1, j, 0)] = static_cast<TypeParam>(0.5); + lsq_pseudoinv_h[pseudoinv_at(i, 2, j, 0)] = static_cast<TypeParam>(0.2); + lsq_pseudoinv_h[pseudoinv_at(i, 3, j, 0)] = static_cast<TypeParam>(0.7); + lsq_pseudoinv_h[pseudoinv_at(i, 4, j, 0)] = static_cast<TypeParam>(1.3); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); + lsq_moments_h[moments_at(i, 0, 2)] = static_cast<TypeParam>(0.4); + lsq_moments_h[moments_at(i, 0, 3)] = static_cast<TypeParam>(0.5); + lsq_moments_h[moments_at(i, 0, 4)] = static_cast<TypeParam>(0.6); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_q_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -676,25 +895,28 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -0.56, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(-0.56), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(1.0), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(0.5), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + static_cast<TypeParam>(0.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + static_cast<TypeParam>(0.7), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 1.3, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + static_cast<TypeParam>(1.3), 1e-6); } TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { @@ -713,32 +935,51 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + + // Use fixed seed for reproducibility std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - // Initialization is done only for iblk = 0 and ilev = 0 + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_unk; ++j) { for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + lsq_pseudoinv_h[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, j)] = real_distrib(gen); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_q_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -749,9 +990,11 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -760,24 +1003,27 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } for (int j = 1; j < lsq_dim_unk + 1; ++j) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + lsq_pseudoinv_h[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } + // p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + // lsq_moments_h[moments_at(jc, jb, j - 1)]; } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { + p_cc_h[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk + 1; ++j) { p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + // p_result[at<lsq_dim_unk + 1, nproma>(j, jc)]; p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; + lsq_moments_h[moments_at(jc, jb, j)]; } } } @@ -786,7 +1032,7 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { // Check result for (int j = 0; j < lsq_dim_unk + 1; ++j) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) << "For loop result fails for j = " << j << ", jc = " << jc; } @@ -811,51 +1057,73 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, 0)] = 0; for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = i; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; - this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; - this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; - this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + lsq_qtmat_c_h[qtmat_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_qtmat_c_h[qtmat_at(i, 1, j, 0)] = static_cast<TypeParam>(0.9); + lsq_qtmat_c_h[qtmat_at(i, 2, j, 0)] = static_cast<TypeParam>(0.8); + lsq_qtmat_c_h[qtmat_at(i, 3, j, 0)] = static_cast<TypeParam>(0.7); + lsq_qtmat_c_h[qtmat_at(i, 4, j, 0)] = static_cast<TypeParam>(0.6); + lsq_qtmat_c_h[qtmat_at(i, 5, j, 0)] = static_cast<TypeParam>(0.5); + lsq_qtmat_c_h[qtmat_at(i, 6, j, 0)] = static_cast<TypeParam>(0.4); + lsq_qtmat_c_h[qtmat_at(i, 7, j, 0)] = static_cast<TypeParam>(0.3); + lsq_qtmat_c_h[qtmat_at(i, 8, j, 0)] = static_cast<TypeParam>(0.2); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, j, 0)] = static_cast<TypeParam>(2.0); } for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + lsq_rmat_utri_c_h[rmat_utri_at(i, j, 0)] = static_cast<TypeParam>(1.0); } - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); + lsq_moments_h[moments_at(i, 0, 2)] = static_cast<TypeParam>(0.4); + lsq_moments_h[moments_at(i, 0, 3)] = static_cast<TypeParam>(0.5); + lsq_moments_h[moments_at(i, 0, 4)] = static_cast<TypeParam>(0.6); + lsq_moments_h[moments_at(i, 0, 5)] = static_cast<TypeParam>(0.7); + lsq_moments_h[moments_at(i, 0, 6)] = static_cast<TypeParam>(0.8); + lsq_moments_h[moments_at(i, 0, 7)] = static_cast<TypeParam>(0.9); + lsq_moments_h[moments_at(i, 0, 8)] = static_cast<TypeParam>(1.0); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_c<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -867,37 +1135,40 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.28, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(0.28), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(-0.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + static_cast<TypeParam>(-0.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - -0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + static_cast<TypeParam>(-0.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - -0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + static_cast<TypeParam>(-0.2), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); } TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { @@ -916,6 +1187,17 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_qtmat_c_h = Kokkos::create_mirror_view(this->lsq_qtmat_c); + auto lsq_rmat_rdiag_c_h = Kokkos::create_mirror_view(this->lsq_rmat_rdiag_c); + auto lsq_rmat_utri_c_h = Kokkos::create_mirror_view(this->lsq_rmat_utri_c); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -923,29 +1205,39 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_unk; ++j) { for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + lsq_qtmat_c_h[qtmat_at(i, j, k, 0)] = real_distrib(gen); } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + lsq_rmat_rdiag_c_h[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, j)] = real_distrib(gen); } for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + lsq_rmat_utri_c_h[rmat_utri_at(i, j, 0)] = real_distrib(gen); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_qtmat_c, lsq_qtmat_c_h); + Kokkos::deep_copy(this->lsq_rmat_rdiag_c, lsq_rmat_rdiag_c_h); + Kokkos::deep_copy(this->lsq_rmat_utri_c, lsq_rmat_utri_c_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_c<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -957,6 +1249,9 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); @@ -968,16 +1263,15 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } for (int j = 0; j < lsq_dim_unk; ++j) { z_qt_times_d[j] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + z_qt_times_d[j] += lsq_qtmat_c_h[qtmat_at(jc, j, i, jb)] * z_d[i]; } } int utri_id = 0; @@ -985,29 +1279,29 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; for (int k = j + 1; k <= lsq_dim_unk; ++k) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + lsq_rmat_utri_c_h[rmat_utri_at(jc, utri_id++, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; } p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, j - 1, jb)]; } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; + p_cc_h[p_cc_at(jc, jk, jb)]; for (int j = 0; j < lsq_dim_unk; ++j) { p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; + lsq_moments_h[moments_at(jc, jb, j)]; } } } } // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int j = 0; j < lsq_dim_unk + 1; ++j) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; + EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; } } } @@ -1025,43 +1319,61 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + p_cc_h[p_cc_at(i, 0, 0)] = static_cast<TypeParam>(i + 1); - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, 0)] = 0; for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = i; + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; - this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; - this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; - this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + lsq_pseudoinv_h[pseudoinv_at(i, 0, j, 0)] = static_cast<TypeParam>(1.0); + lsq_pseudoinv_h[pseudoinv_at(i, 1, j, 0)] = static_cast<TypeParam>(0.9); + lsq_pseudoinv_h[pseudoinv_at(i, 2, j, 0)] = static_cast<TypeParam>(0.8); + lsq_pseudoinv_h[pseudoinv_at(i, 3, j, 0)] = static_cast<TypeParam>(0.7); + lsq_pseudoinv_h[pseudoinv_at(i, 4, j, 0)] = static_cast<TypeParam>(0.6); + lsq_pseudoinv_h[pseudoinv_at(i, 5, j, 0)] = static_cast<TypeParam>(0.5); + lsq_pseudoinv_h[pseudoinv_at(i, 6, j, 0)] = static_cast<TypeParam>(0.4); + lsq_pseudoinv_h[pseudoinv_at(i, 7, j, 0)] = static_cast<TypeParam>(0.3); + lsq_pseudoinv_h[pseudoinv_at(i, 8, j, 0)] = static_cast<TypeParam>(0.2); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + lsq_moments_h[moments_at(i, 0, 0)] = static_cast<TypeParam>(0.2); + lsq_moments_h[moments_at(i, 0, 1)] = static_cast<TypeParam>(0.3); + lsq_moments_h[moments_at(i, 0, 2)] = static_cast<TypeParam>(0.4); + lsq_moments_h[moments_at(i, 0, 3)] = static_cast<TypeParam>(0.5); + lsq_moments_h[moments_at(i, 0, 4)] = static_cast<TypeParam>(0.6); + lsq_moments_h[moments_at(i, 0, 5)] = static_cast<TypeParam>(0.7); + lsq_moments_h[moments_at(i, 0, 6)] = static_cast<TypeParam>(0.8); + lsq_moments_h[moments_at(i, 0, 7)] = static_cast<TypeParam>(0.9); + lsq_moments_h[moments_at(i, 0, 8)] = static_cast<TypeParam>(1.0); } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_c_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -1072,37 +1384,40 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Check result EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -1.64, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + static_cast<TypeParam>(-1.64), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + static_cast<TypeParam>(1.0), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.9, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + static_cast<TypeParam>(0.9), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.8, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + static_cast<TypeParam>(0.8), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + static_cast<TypeParam>(0.7), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.6, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + static_cast<TypeParam>(0.6), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - 0.5, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + static_cast<TypeParam>(0.5), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + static_cast<TypeParam>(0.4), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - 0.3, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + static_cast<TypeParam>(0.3), 1e-6); EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.2, 1e-6); + p_coeff_h[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + static_cast<TypeParam>(0.2), 1e-6); } TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { @@ -1118,6 +1433,16 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Create host mirror views + auto p_cc_h = Kokkos::create_mirror_view(this->p_cc); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto lsq_pseudoinv_h = Kokkos::create_mirror_view(this->lsq_pseudoinv); + auto lsq_moments_h = Kokkos::create_mirror_view(this->lsq_moments); + auto p_coeff_h = Kokkos::create_mirror_view(this->p_coeff); + std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); @@ -1125,25 +1450,33 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + p_cc_h[p_cc_at(i, 0, 0)] = real_distrib(gen); for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_unk; ++j) { for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + lsq_pseudoinv_h[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + lsq_moments_h[moments_at(i, 0, j)] = real_distrib(gen); } for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + p_coeff_h[p_coeff_at(j, i, 0, 0)] = static_cast<TypeParam>(0.0); } } + // Copy to device + Kokkos::deep_copy(this->p_cc, p_cc_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->lsq_pseudoinv, lsq_pseudoinv_h); + Kokkos::deep_copy(this->lsq_moments, lsq_moments_h); + Kokkos::deep_copy(this->p_coeff, p_coeff_h); + // Test function recon_lsq_cell_c_svd<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), @@ -1154,6 +1487,9 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Copy results back to host + Kokkos::deep_copy(p_coeff_h, this->p_coeff); + // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); @@ -1166,32 +1502,33 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } for (int j = 1; j < lsq_dim_unk + 1; ++j) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + lsq_pseudoinv_h[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; + p_cc_h[p_cc_at(jc, jk, jb)]; for (int j = 0; j < lsq_dim_unk; ++j) { p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; + lsq_moments_h[moments_at(jc, jb, j)]; } } } } + // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) << "For loop result fails for i = " << i << ", jc = " << jc; } -- GitLab From 97577dd3de83fa953eae5cdc4704142241d1f370 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 27 Mar 2025 11:58:57 +0100 Subject: [PATCH 07/34] modified the tests in horizontal_rot --- test/c/CMakeLists.txt | 2 +- test/c/test_horizontal_rot.cpp | 301 ++++++++++++++++++++------------- 2 files changed, 186 insertions(+), 117 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index f4c5e27..2dd32f4 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -33,8 +33,8 @@ endif() set(SOURCES main.cpp test_horizontal_div.cpp - # test_horizontal_rot.cpp test_horizontal_recon.cpp + test_horizontal_rot.cpp # test_tdma_solver.cpp # test_interpolation_vector.cpp # test_intp_rbf.cpp diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp index 68e8024..2c8fc46 100644 --- a/test/c/test_horizontal_rot.cpp +++ b/test/c/test_horizontal_rot.cpp @@ -14,8 +14,8 @@ #include <vector> #include <Kokkos_Core.hpp> -#include <gtest/gtest.h> #include <dim_helper.hpp> +#include <gtest/gtest.h> #include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> @@ -38,25 +38,31 @@ protected: bool lacc = false; // Not using ACC-specific behavior. bool acc_async = false; // Not using ACC-specific behavior. - std::vector<ValueType> vec_e; - std::vector<int> vert_edge_idx; - std::vector<int> vert_edge_blk; - std::vector<ValueType> geofac_rot; - std::vector<ValueType> rot_vec; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; - - HorizontalRotVertexTest() { + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Views for the test data. All the data is assigned as one-dimensional arrays + Kokkos::View<ValueType *, memory_space> vec_e; + Kokkos::View<int *, memory_space> vert_edge_idx; + Kokkos::View<int *, memory_space> vert_edge_blk; + Kokkos::View<ValueType *, memory_space> geofac_rot; + Kokkos::View<ValueType *, memory_space> rot_vec; + Kokkos::View<ValueType *, memory_space> f4din; + Kokkos::View<ValueType *, memory_space> f4dout; + + HorizontalRotVertexTest() + : vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), + vert_edge_idx("vert_edge_idx", dim_combine(nproma, nblks_v, 6)), + vert_edge_blk("vert_edge_blk", dim_combine(nproma, nblks_v, 6)), + geofac_rot("geofac_rot", dim_combine(nproma, 6, nblks_v)), + rot_vec("rot_vec", dim_combine(nproma, nlev, nblks_v)), + f4din("f4din", dim_combine(nproma, nlev, nblks_e, dim4d)), + f4dout("f4dout", dim_combine(nproma, nlev, nblks_v, dim4d)) { + + // We keep slev and elev as std::vector since they are small and used only + // on the host. slev.resize(dim4d, 0); elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); - vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); - geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); - rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); } }; @@ -76,33 +82,46 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) { const auto &geofac_rot_at = at<nproma, 6, nblks_v>; const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + // Create host mirror views + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto vert_edge_idx_h = Kokkos::create_mirror_view(this->vert_edge_idx); + auto vert_edge_blk_h = Kokkos::create_mirror_view(this->vert_edge_blk); + auto geofac_rot_h = Kokkos::create_mirror_view(this->geofac_rot); + auto rot_vec_h = Kokkos::create_mirror_view(this->rot_vec); + // Initialization with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + vec_e_h(vec_e_at(i, k, 0)) = (i + 1) * (k + 1); // Simple pattern } // Set edge indices to point to specific edges for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + vert_edge_idx_h(vert_edge_at(i, 0, j)) = (i + j) % nproma; // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + vert_edge_blk_h(vert_edge_at(i, 0, j)) = 0; } - // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + geofac_rot_h(geofac_rot_at(i, 0, 0)) = 0.3; + geofac_rot_h(geofac_rot_at(i, 1, 0)) = 0.2; + geofac_rot_h(geofac_rot_at(i, 2, 0)) = 0.1; + geofac_rot_h(geofac_rot_at(i, 3, 0)) = 0.2; + geofac_rot_h(geofac_rot_at(i, 4, 0)) = 0.1; + geofac_rot_h(geofac_rot_at(i, 5, 0)) = 0.1; // Initialize rot_vec to zero for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + rot_vec_h(rot_vec_at(i, k, 0)) = 0.0; } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->vert_edge_idx, vert_edge_idx_h); + Kokkos::deep_copy(this->vert_edge_blk, vert_edge_blk_h); + Kokkos::deep_copy(this->geofac_rot, geofac_rot_h); + Kokkos::deep_copy(this->rot_vec, rot_vec_h); + // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), @@ -111,13 +130,22 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) { this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); + // Copy results back to host for verification + Kokkos::deep_copy(rot_vec_h, this->rot_vec); + // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(0, 0, 0)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(0, 1, 0)], static_cast<TypeParam>(3.4), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(1, 0, 0)], static_cast<TypeParam>(2.1), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(1, 1, 0)], static_cast<TypeParam>(4.2), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(2, 0, 0)], static_cast<TypeParam>(2.2), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(2, 1, 0)], static_cast<TypeParam>(4.4), + 1e-6); } TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { @@ -131,6 +159,13 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { const auto &geofac_rot_at = at<nproma, 6, nblks_v>; const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + // Create host mirror views + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto vert_edge_idx_h = Kokkos::create_mirror_view(this->vert_edge_idx); + auto vert_edge_blk_h = Kokkos::create_mirror_view(this->vert_edge_blk); + auto geofac_rot_h = Kokkos::create_mirror_view(this->geofac_rot); + auto rot_vec_h = Kokkos::create_mirror_view(this->rot_vec); + // Set up random number generators std::random_device rd; std::mt19937 gen(rd()); @@ -140,27 +175,34 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { // Initialization with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + vec_e_h(vec_e_at(i, k, 0)) = real_distrib(gen); } // Set random edge indices for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = + vert_edge_idx_h(vert_edge_at(i, 0, j)) = int_distrib(gen); + vert_edge_blk_h(vert_edge_at(i, 0, j)) = 0; // Keep in same block for simplicity } // Random geometric factors for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + geofac_rot_h(geofac_rot_at(i, j, 0)) = real_distrib(gen); } - // Initialize rot_vec to random values + // Initialize rot_vec to zero for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + rot_vec_h(rot_vec_at(i, k, 0)) = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->vert_edge_idx, vert_edge_idx_h); + Kokkos::deep_copy(this->vert_edge_blk, vert_edge_blk_h); + Kokkos::deep_copy(this->geofac_rot, geofac_rot_h); + Kokkos::deep_copy(this->rot_vec, rot_vec_h); + // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), @@ -169,6 +211,9 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); + // Copy results back to host for verification + Kokkos::deep_copy(rot_vec_h, this->rot_vec); + // Calculate reference values separately and verify results std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); @@ -180,30 +225,24 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 0)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 0)])] * + geofac_rot_h[geofac_rot_at(jv, 0, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 1)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 1)])] * + geofac_rot_h[geofac_rot_at(jv, 1, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 2)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 2)])] * + geofac_rot_h[geofac_rot_at(jv, 2, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 3)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 3)])] * + geofac_rot_h[geofac_rot_at(jv, 3, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 4)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 4)])] * + geofac_rot_h[geofac_rot_at(jv, 4, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 5)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 5)])] * + geofac_rot_h[geofac_rot_at(jv, 5, jb)]; } } } @@ -211,7 +250,7 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + EXPECT_NEAR(rot_vec_h[rot_vec_at(i, k, 0)], ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) << "Results differ at i=" << i << ", k=" << k; } @@ -231,33 +270,47 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) { const auto &geofac_rot_at = at<nproma, 6, nblks_v>; const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + // Create host mirror views + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto vert_edge_idx_h = Kokkos::create_mirror_view(this->vert_edge_idx); + auto vert_edge_blk_h = Kokkos::create_mirror_view(this->vert_edge_blk); + auto geofac_rot_h = Kokkos::create_mirror_view(this->geofac_rot); + auto rot_vec_h = Kokkos::create_mirror_view(this->rot_vec); + // Initialization with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + vec_e_h(vec_e_at(i, k, 0)) = (i + 1) * (k + 1); // Simple pattern } // Set edge indices to point to specific edges for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + vert_edge_idx_h(vert_edge_at(i, 0, j)) = (i + j) % nproma; // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + vert_edge_blk_h(vert_edge_at(i, 0, j)) = 0; } // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + geofac_rot_h(geofac_rot_at(i, 0, 0)) = 0.3; + geofac_rot_h(geofac_rot_at(i, 1, 0)) = 0.2; + geofac_rot_h(geofac_rot_at(i, 2, 0)) = 0.1; + geofac_rot_h(geofac_rot_at(i, 3, 0)) = 0.2; + geofac_rot_h(geofac_rot_at(i, 4, 0)) = 0.1; + geofac_rot_h(geofac_rot_at(i, 5, 0)) = 0.1; // Initialize rot_vec to zero for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + rot_vec_h(rot_vec_at(i, k, 0)) = 0.0; } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->vert_edge_idx, vert_edge_idx_h); + Kokkos::deep_copy(this->vert_edge_blk, vert_edge_blk_h); + Kokkos::deep_copy(this->geofac_rot, geofac_rot_h); + Kokkos::deep_copy(this->rot_vec, rot_vec_h); + // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), @@ -266,13 +319,22 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) { this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); + // Copy results back to host for verification + Kokkos::deep_copy(rot_vec_h, this->rot_vec); + // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(0, 0, 0)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(0, 1, 0)], static_cast<TypeParam>(3.4), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(1, 0, 0)], static_cast<TypeParam>(2.1), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(1, 1, 0)], static_cast<TypeParam>(4.2), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(2, 0, 0)], static_cast<TypeParam>(2.2), + 1e-6); + EXPECT_NEAR(rot_vec_h[rot_vec_at(2, 1, 0)], static_cast<TypeParam>(4.4), + 1e-6); } TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { @@ -286,6 +348,13 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { const auto &geofac_rot_at = at<nproma, 6, nblks_v>; const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + // Create host mirror views + auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); + auto vert_edge_idx_h = Kokkos::create_mirror_view(this->vert_edge_idx); + auto vert_edge_blk_h = Kokkos::create_mirror_view(this->vert_edge_blk); + auto geofac_rot_h = Kokkos::create_mirror_view(this->geofac_rot); + auto rot_vec_h = Kokkos::create_mirror_view(this->rot_vec); + // Set up random number generators std::random_device rd; std::mt19937 gen(rd()); @@ -295,27 +364,34 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { // Initialization with random values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + vec_e_h(vec_e_at(i, k, 0)) = real_distrib(gen); } // Set random edge indices for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = + vert_edge_idx_h(vert_edge_at(i, 0, j)) = int_distrib(gen); + vert_edge_blk_h(vert_edge_at(i, 0, j)) = 0; // Keep in same block for simplicity } // Random geometric factors for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + geofac_rot_h(geofac_rot_at(i, j, 0)) = real_distrib(gen); } - // Initialize rot_vec to random values + // Initialize rot_vec to zero for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + rot_vec_h(rot_vec_at(i, k, 0)) = static_cast<TypeParam>(0.0); } } + // Copy initialized data to device + Kokkos::deep_copy(this->vec_e, vec_e_h); + Kokkos::deep_copy(this->vert_edge_idx, vert_edge_idx_h); + Kokkos::deep_copy(this->vert_edge_blk, vert_edge_blk_h); + Kokkos::deep_copy(this->geofac_rot, geofac_rot_h); + Kokkos::deep_copy(this->rot_vec, rot_vec_h); + // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), @@ -324,8 +400,8 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); - // Ensure computation is complete for both modes - Kokkos::fence(); + // Copy results back to host for verification + Kokkos::deep_copy(rot_vec_h, this->rot_vec); // Calculate reference values separately and verify results std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); @@ -338,30 +414,24 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 0)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 0)])] * + geofac_rot_h[geofac_rot_at(jv, 0, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 1)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 1)])] * + geofac_rot_h[geofac_rot_at(jv, 1, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 2)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 2)])] * + geofac_rot_h[geofac_rot_at(jv, 2, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 3)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 3)])] * + geofac_rot_h[geofac_rot_at(jv, 3, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 4)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 4)])] * + geofac_rot_h[geofac_rot_at(jv, 4, jb)] + + vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 5)], jk, + vert_edge_blk_h[vert_edge_at(jv, jb, 5)])] * + geofac_rot_h[geofac_rot_at(jv, 5, jb)]; } } } @@ -369,10 +439,9 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + EXPECT_NEAR(rot_vec_h[rot_vec_at(i, k, 0)], ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) << "Results differ at i=" << i << ", k=" << k << ")"; } } } - -- GitLab From ec65397a9e1b0590a639ef693d0b61804bb81dca Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 27 Mar 2025 11:59:57 +0100 Subject: [PATCH 08/34] formatted the file test_horizontal_div --- test/c/test_horizontal_div.cpp | 440 +++++++++++++++++++-------------- 1 file changed, 251 insertions(+), 189 deletions(-) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index bc6ea9f..5904691 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -14,14 +14,13 @@ #include <vector> #include <Kokkos_Core.hpp> -#include <gtest/gtest.h> #include <dim_helper.hpp> +#include <gtest/gtest.h> #include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> /// Test class for the horizontal divergence tests. Templated for the ValueType -template <typename ValueType> -class HorizontalDivTest : public ::testing::Test { +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 2; // number of vertical levels @@ -37,26 +36,26 @@ protected: std::vector<int> elev; bool lacc = false; // Not using ACC-specific behavior. - // Here we allocate Kokkos::View objects in a memory space that is directly accessible - // from both the host and device + // Here we allocate Kokkos::View objects in a memory space that is directly + // accessible from both the host and device using exec_space = Kokkos::DefaultExecutionSpace; using memory_space = exec_space::memory_space; // Views for the test data. All the data is assigned as one-dimensional arrays - Kokkos::View<ValueType*, memory_space> vec_e; - Kokkos::View<int*, memory_space> cell_edge_idx; - Kokkos::View<int*, memory_space> cell_edge_blk; - Kokkos::View<ValueType*, memory_space> geofac_div; - Kokkos::View<ValueType*, memory_space> div_vec_c; - Kokkos::View<ValueType*, memory_space> f4din; - Kokkos::View<ValueType*, memory_space> f4dout; + Kokkos::View<ValueType *, memory_space> vec_e; + Kokkos::View<int *, memory_space> cell_edge_idx; + Kokkos::View<int *, memory_space> cell_edge_blk; + Kokkos::View<ValueType *, memory_space> geofac_div; + Kokkos::View<ValueType *, memory_space> div_vec_c; + Kokkos::View<ValueType *, memory_space> f4din; + Kokkos::View<ValueType *, memory_space> f4dout; // Followings are needed in HorizontalDivAvgTest - Kokkos::View<int*, memory_space> cell_neighbor_idx; - Kokkos::View<int*, memory_space> cell_neighbor_blk; - Kokkos::View<ValueType*, memory_space> avg_coeff; - Kokkos::View<ValueType*, memory_space> opt_in2; - Kokkos::View<ValueType*, memory_space> opt_out2; + Kokkos::View<int *, memory_space> cell_neighbor_idx; + Kokkos::View<int *, memory_space> cell_neighbor_blk; + Kokkos::View<ValueType *, memory_space> avg_coeff; + Kokkos::View<ValueType *, memory_space> opt_in2; + Kokkos::View<ValueType *, memory_space> opt_out2; HorizontalDivTest() : vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), @@ -70,13 +69,12 @@ protected: cell_neighbor_blk("cell_neighbor_blk", dim_combine(nproma, nblks_c, 3)), avg_coeff("avg_coeff", dim_combine(nproma, 4, nblks_c)), opt_in2("opt_in2", dim_combine(nproma, nlev, nblks_e)), - opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) - { + opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) { - // We keep slev and elev as std::vector since they are small and used only on the host. + // We keep slev and elev as std::vector since they are small and used only + // on the host. slev.resize(dim4d, 0); elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - } }; @@ -126,8 +124,8 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); } } - // Copy the initialized data back to the device memory (or unified memory, which in some - // cases may be a no-op if already accessible on the host). + // Copy the initialized data back to the device memory (or unified memory, + // which in some cases may be a no-op if already accessible on the host). Kokkos::deep_copy(this->vec_e, vec_e_h); Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); @@ -144,12 +142,18 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 0, 0)), static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 1, 0)), static_cast<TypeParam>(3.4), 1e-6); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 0, 0)), static_cast<TypeParam>(2.1), 1e-6); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 1, 0)), static_cast<TypeParam>(4.2), 1e-6); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 0, 0)), static_cast<TypeParam>(2.2), 1e-6); - EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 1, 0)), static_cast<TypeParam>(4.4), 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 0, 0)), static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(0, 1, 0)), static_cast<TypeParam>(3.4), + 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 0, 0)), static_cast<TypeParam>(2.1), + 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(1, 1, 0)), static_cast<TypeParam>(4.2), + 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 0, 0)), static_cast<TypeParam>(2.2), + 1e-6); + EXPECT_NEAR(div_vec_c_h(div_vec_c_at(2, 1, 0)), static_cast<TypeParam>(4.4), + 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { @@ -163,7 +167,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { const auto &geofac_div_at = at<nproma, 3, nblks_c>; const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - //create mirror views to store data on the host + // create mirror views to store data on the host auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); @@ -201,12 +205,12 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { Kokkos::deep_copy(this->geofac_div, geofac_div_h); Kokkos::deep_copy(this->div_vec_c, div_vec_c_h); - div3d<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->div_vec_c.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_c, - this->nblks_e); + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); @@ -221,17 +225,14 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * geofac_div_h[geofac_div_at(jc, 2, jb)]; } } @@ -273,8 +274,10 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { // Initialization with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern - f4din_h[f4d_at(i, k, 0, 0)] = static_cast<TypeParam>((i + 1) * (k + 2)); // Different pattern for second field + vec_e_h[vec_e_at(i, k, 0)] = + static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + f4din_h[f4d_at(i, k, 0, 0)] = static_cast<TypeParam>( + (i + 1) * (k + 2)); // Different pattern for second field } // Set edge indices to point to specific cells (including self) @@ -322,20 +325,32 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { Kokkos::deep_copy(f4dout_h, this->f4dout); // Check first field (same as in div3d test) - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.4), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.1), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.2), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.2), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.4), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.4), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.1), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.2), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.2), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.4), + 1e-6); // Check second field (expected values calculated manually) - EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(3.4), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(5.1), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(4.2), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(6.3), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(4.4), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(6.6), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(3.4), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(5.1), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(4.2), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(6.3), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(4.4), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(6.6), + 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { @@ -377,7 +392,8 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); - cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + cell_edge_blk_h[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -402,12 +418,13 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { Kokkos::deep_copy(this->f4dout, f4dout_h); // Call the div3d_2field function - div3d_2field<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev[0], this->elev[0], this->nproma, this->lacc, - this->nlev, this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Copy results back to host for verification Kokkos::deep_copy(div_vec_c_h, this->div_vec_c); @@ -426,32 +443,26 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { for (int jc = i_startidx; jc < i_endidx; ++jc) { // Calculate reference value for first field ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * geofac_div_h[geofac_div_at(jc, 2, jb)]; // Calculate reference value for second field ref_f4dout[f4dout_at(jc, jk, jb, 0)] = - f4din_h[f4d_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)], 0)] * + f4din_h[f4d_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)], 0)] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - f4din_h[f4d_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)], 0)] * + f4din_h[f4d_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)], 0)] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - f4din_h[f4d_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)], 0)] * + f4din_h[f4d_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)], 0)] * geofac_div_h[geofac_div_at(jc, 2, jb)]; } } @@ -500,7 +511,8 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { for (int j = 0; j < 3; ++j) { cell_edge_idx_h[cell_edge_at(i, 0, j)] = (i + j) % nproma; cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; - geofac_div_h[geofac_div_at(i, j, 0)] = static_cast<TypeParam>(0.1 * (j + 1)); + geofac_div_h[geofac_div_at(i, j, 0)] = + static_cast<TypeParam>(0.1 * (j + 1)); } for (int k = 0; k < nlev; ++k) { @@ -519,28 +531,40 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { Kokkos::deep_copy(this->f4dout, f4dout_h); // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Copy results back to host for verification Kokkos::deep_copy(f4dout_h, this->f4dout); - EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(1.4), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(1.1), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(1.1), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(2.0), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 1)], static_cast<TypeParam>(2.0), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 1)], static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 1)], static_cast<TypeParam>(1.7), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 1)], static_cast<TypeParam>(2.6), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 1)], static_cast<TypeParam>(2.3), 1e-6); - EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 1)], static_cast<TypeParam>(2.3), 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 0)], static_cast<TypeParam>(1.4), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 0)], static_cast<TypeParam>(1.1), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 0)], static_cast<TypeParam>(1.1), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 0)], static_cast<TypeParam>(2.0), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 0)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 0)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 0, 0, 1)], static_cast<TypeParam>(2.0), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 0, 0, 1)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 0, 0, 1)], static_cast<TypeParam>(1.7), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(0, 1, 0, 1)], static_cast<TypeParam>(2.6), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(1, 1, 0, 1)], static_cast<TypeParam>(2.3), + 1e-6); + EXPECT_NEAR(f4dout_h[f4dout_at(2, 1, 0, 1)], static_cast<TypeParam>(2.3), + 1e-6); } TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { @@ -591,12 +615,12 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { Kokkos::deep_copy(this->f4dout, f4dout_h); // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Copy results back to host for verification Kokkos::deep_copy(f4dout_h, this->f4dout); @@ -612,11 +636,10 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { for (int jc = i_startidx; jc < i_endidx; ++jc) { TypeParam expected = 0.0; for (int je = 0; je < 3; ++je) { - expected += - f4din_h[f4din_at( - cell_edge_idx_h[cell_edge_at(jc, jb, je)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, je)], ji)] * - geofac_div_h[geofac_div_at(jc, je, jb)]; + expected += f4din_h[f4din_at( + cell_edge_idx_h[cell_edge_at(jc, jb, je)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, je)], ji)] * + geofac_div_h[geofac_div_at(jc, je, jb)]; } EXPECT_NEAR(f4dout_h[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) @@ -656,8 +679,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { // Create mirror views to store data on the host auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); - auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); - auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); @@ -669,8 +694,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { // Initialize the vectors with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern - opt_in2_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e + vec_e_h[vec_e_at(i, k, 0)] = + static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + opt_in2_h[vec_e_at(i, k, 0)] = + static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e } // Set edge indices to point to specific cells @@ -696,9 +723,12 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { // Average coefficients avg_coeff_h[avg_coeff_at(i, 0, 0)] = static_cast<TypeParam>(0.4); // Self - avg_coeff_h[avg_coeff_at(i, 1, 0)] = static_cast<TypeParam>(0.2); // First neighbor - avg_coeff_h[avg_coeff_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Second neighbor - avg_coeff_h[avg_coeff_at(i, 3, 0)] = static_cast<TypeParam>(0.2); // Third neighbor + avg_coeff_h[avg_coeff_at(i, 1, 0)] = + static_cast<TypeParam>(0.2); // First neighbor + avg_coeff_h[avg_coeff_at(i, 2, 0)] = + static_cast<TypeParam>(0.2); // Second neighbor + avg_coeff_h[avg_coeff_at(i, 3, 0)] = + static_cast<TypeParam>(0.2); // Third neighbor // Initialize div_vec_c and opt_out2 to zero for (int k = 0; k < nlev; ++k) { @@ -735,20 +765,32 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { Kokkos::deep_copy(opt_out2_h, this->opt_out2); // Verify first field results - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), + 1e-6); // Verify second field results - EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.94), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(1.88), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(1.02), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(2.04), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(1.04), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(2.08), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.94), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(1.88), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(1.02), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(2.04), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(1.04), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(2.08), + 1e-6); } TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { @@ -778,8 +820,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { // Create mirror views to store data on the host auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); - auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); - auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); @@ -804,10 +848,12 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); - cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + cell_edge_blk_h[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -869,31 +915,25 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * geofac_div_h[geofac_div_at(jc, 2, jb)]; aux_c2[div_vec_c_at(jc, jk, jb)] = - opt_in2_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + opt_in2_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - opt_in2_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + opt_in2_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - opt_in2_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + opt_in2_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * geofac_div_h[geofac_div_at(jc, 2, jb)]; } } @@ -1000,8 +1040,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { // Create mirror views to store data on the host auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); - auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); - auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); @@ -1013,8 +1055,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { // Initialize the vectors with specific values for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { - vec_e_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern - opt_in2_h[vec_e_at(i, k, 0)] = static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e + vec_e_h[vec_e_at(i, k, 0)] = + static_cast<TypeParam>((i + 1) * (k + 1)); // Simple pattern + opt_in2_h[vec_e_at(i, k, 0)] = + static_cast<TypeParam>((i + 1) * (k + 1) * 0.5); // Half of vec_e } // Set edge indices to point to specific cells @@ -1040,9 +1084,12 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { // Average coefficients avg_coeff_h[avg_coeff_at(i, 0, 0)] = static_cast<TypeParam>(0.4); // Self - avg_coeff_h[avg_coeff_at(i, 1, 0)] = static_cast<TypeParam>(0.2); // First neighbor - avg_coeff_h[avg_coeff_at(i, 2, 0)] = static_cast<TypeParam>(0.2); // Second neighbor - avg_coeff_h[avg_coeff_at(i, 3, 0)] = static_cast<TypeParam>(0.2); // Third neighbor + avg_coeff_h[avg_coeff_at(i, 1, 0)] = + static_cast<TypeParam>(0.2); // First neighbor + avg_coeff_h[avg_coeff_at(i, 2, 0)] = + static_cast<TypeParam>(0.2); // Second neighbor + avg_coeff_h[avg_coeff_at(i, 3, 0)] = + static_cast<TypeParam>(0.2); // Third neighbor // Initialize div_vec_c and opt_out2 to zero for (int k = 0; k < nlev; ++k) { @@ -1079,20 +1126,32 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { Kokkos::deep_copy(opt_out2_h, this->opt_out2); // Verify first field results - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), 1e-6); - EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(1.88), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(3.76), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(2.04), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(4.08), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(2.08), + 1e-6); + EXPECT_NEAR(div_vec_c_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(4.16), + 1e-6); // Since l2fields=false, opt_out2 should not be modified - EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(0.0), 1e-6); - EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(0.0), 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 0, 0)], static_cast<TypeParam>(0.0), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(0, 1, 0)], static_cast<TypeParam>(0.0), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 0, 0)], static_cast<TypeParam>(0.0), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(1, 1, 0)], static_cast<TypeParam>(0.0), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 0, 0)], static_cast<TypeParam>(0.0), + 1e-6); + EXPECT_NEAR(opt_out2_h[div_vec_c_at(2, 1, 0)], static_cast<TypeParam>(0.0), + 1e-6); } TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { @@ -1122,8 +1181,10 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // Create mirror views to store data on the host auto vec_e_h = Kokkos::create_mirror_view(this->vec_e); - auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); - auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto cell_neighbor_idx_h = + Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = + Kokkos::create_mirror_view(this->cell_neighbor_blk); auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); auto geofac_div_h = Kokkos::create_mirror_view(this->geofac_div); @@ -1142,16 +1203,19 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { vec_e_h[vec_e_at(i, k, 0)] = real_distrib(gen); - opt_in2_h[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + opt_in2_h[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway } // Set random edge indices for (int j = 0; j < 3; ++j) { cell_edge_idx_h[cell_edge_at(i, 0, j)] = int_distrib(gen); - cell_edge_blk_h[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + cell_edge_blk_h[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity cell_neighbor_idx_h[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + cell_neighbor_blk_h[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -1167,7 +1231,8 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // Random initial values for div_vec_c and opt_out2 for (int k = 0; k < nlev; ++k) { div_vec_c_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); - opt_out2_h[div_vec_c_at(i, k, 0)] = static_cast<TypeParam>(0.0); // Not used but initialize anyway + opt_out2_h[div_vec_c_at(i, k, 0)] = + static_cast<TypeParam>(0.0); // Not used but initialize anyway } } @@ -1211,17 +1276,14 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * geofac_div_h[geofac_div_at(jc, 0, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 1)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 1)])] * geofac_div_h[geofac_div_at(jc, 1, jb)] + - vec_e_h[vec_e_at( - cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, - cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * + vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 2)], jk, + cell_edge_blk_h[cell_edge_at(jc, jb, 2)])] * geofac_div_h[geofac_div_at(jc, 2, jb)]; } } -- GitLab From 23d1ce19c7ea06e0c40513dbfbb161b7acf78f3f Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Thu, 27 Mar 2025 12:13:53 +0100 Subject: [PATCH 09/34] fixed few warnings --- test/c/test_horizontal_recon.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index 089c58f..57b77e6 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -41,6 +41,8 @@ protected: return std::make_tuple(9, 5); case ReconstructionMethod::cubic: return std::make_tuple(9, 9); + default: + return std::make_tuple(0, 0); // or throw/assert if appropriate } } @@ -930,9 +932,6 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; // Create host mirror views -- GitLab From 58c4b6602edc5fa06c5574520a3128f622dce4d0 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 4 Apr 2025 15:22:48 +0200 Subject: [PATCH 10/34] removed unused calls of kokkos::view --- src/horizontal/mo_lib_divrot.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index d460211..f21da12 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -200,9 +200,6 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - // Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); - // Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -366,8 +363,6 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -507,9 +502,6 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - // Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); - // Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -753,8 +745,6 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - // Kokkos::View<T *> z_b("z_b", 9); - UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); -- GitLab From 62fb9fffc6a9f9f5670e45d4509efa28cac853db Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 4 Apr 2025 15:36:07 +0200 Subject: [PATCH 11/34] corrected the use of nproma in mo_lib_loopindices --- src/support/mo_lib_loopindices.cpp | 59 +++++++++++++++++++----------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index 30c82bd..8c8d318 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -12,21 +12,26 @@ #include <algorithm> // For std::max // get_indices_c_lib function -void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, +void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, const int i_blk, const int i_startblk, const int i_endblk, int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { - + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from //(either fortran or c++), the first index should be either 0 or 1. int first_index; - if (called_from_cpp) + int nproma_loc; + if (called_from_cpp){ first_index = 0; - else - first_index = 1; - + nproma_loc = nproma - 1; + } + else { + first_index = 1; + nproma_loc = nproma; + } + if (i_blk == i_startblk) { i_startidx_out = std::max(first_index, i_startidx_in); - i_endidx_out = nproma; + i_endidx_out = nproma_loc; if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; } @@ -35,43 +40,53 @@ void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int i_endidx_out = i_endidx_in; } else { i_startidx_out = first_index; - i_endidx_out = nproma; + i_endidx_out = nproma_loc; } } // get_indices_e_lib function -void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, +void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, const int i_blk, const int i_startblk, const int i_endblk, int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { - - //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, //the first index should be either 0 or 1. int first_index; - if (called_from_cpp) + int nproma_loc; + if (called_from_cpp) { first_index = 0; - else + nproma_loc = nproma - 1; + } + else { first_index = 1; + nproma_loc = nproma; + } i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in); - i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in; + i_endidx_out = (i_blk != i_endblk) ? nproma_loc : i_endidx_in; } // get_indices_v_lib function -void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, +void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, const int i_blk, const int i_startblk, const int i_endblk, int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) { - - //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, + + //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, //the first index should be either 0 or 1. int first_index; - if (called_from_cpp) + int nproma_loc; + if (called_from_cpp) { first_index = 0; - else + nproma_loc = nproma - 1; + } + else { first_index = 1; + nproma_loc = nproma; + } if (i_blk == i_startblk) { i_startidx_out = i_startidx_in; - i_endidx_out = nproma; + i_endidx_out = nproma_loc; if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; } @@ -80,6 +95,6 @@ void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int i_endidx_out = i_endidx_in; } else { i_startidx_out = first_index; - i_endidx_out = nproma; + i_endidx_out = nproma_loc; } -} \ No newline at end of file +} -- GitLab From 0217e3e0c9f8bf2332b96a8981df91cc38d43a6b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 4 Apr 2025 15:37:04 +0200 Subject: [PATCH 12/34] modified few c++ functions to incorporate the changes made in the last commit --- src/horizontal/mo_lib_divrot.cpp | 74 ++++++++++++++--------------- test/c/test_horizontal_div.cpp | 80 ++++++++++++++++---------------- test/c/test_horizontal_recon.cpp | 43 +++++++++++------ test/c/test_horizontal_rot.cpp | 18 +++---- 4 files changed, 114 insertions(+), 101 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index f21da12..dbbef65 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -51,13 +51,13 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_l_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -134,13 +134,13 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_l_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -217,7 +217,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( {0, i_startidx_in, slev, i_startblk}, - {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + {lsq_dim_unk + 1, i_endidx_in + 1, elev + 1, i_endblk + 1}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -225,13 +225,13 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, }); } - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_q_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -384,13 +384,13 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, }); } - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_q_svd_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -527,13 +527,13 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, }); } - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_c_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -756,13 +756,13 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); if (patch_id > 0 || l_limited_area) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( - {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); + {slev, i_startidx, 0}, {elev + 1, i_endidx + 1, lsq_dim_unk + 1}); Kokkos::parallel_for( "recon_lsq_cell_c_svd_init", initPolicy, KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { @@ -771,13 +771,13 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, } } - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "recon_lsq_cell_c_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -932,13 +932,13 @@ void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { div_vec_c_view(jc, jk, jb) = @@ -980,13 +980,13 @@ void div3d_2field(const T *vec_e, const int *cell_edge_idx, UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div3d_2field_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1039,14 +1039,14 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); for (int ji = 0; ji < dim4d; ++ji) { Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, - {elev[ji], i_endidx}); + {elev[ji] + 1, i_endidx + 1}); Kokkos::parallel_for( "div4d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1104,13 +1104,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, int i_endblk = i_endblk_in[0]; if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1131,13 +1131,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, }); } } else { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1156,13 +1156,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, i_startblk = i_startblk_in[1]; i_endblk = i_endblk_in[1]; - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step3", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1171,13 +1171,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step4", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1191,13 +1191,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, i_endblk = i_endblk_in[2]; if (l2fields) { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step5", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1220,13 +1220,13 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, }); } } else { - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "div_avg_step6", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { @@ -1269,13 +1269,13 @@ void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "rot_vertex_atmos_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { @@ -1322,13 +1322,13 @@ void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); - for (int jb = i_startblk; jb < i_endblk; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, - {elev, i_endidx}); + {elev + 1, i_endidx + 1}); Kokkos::parallel_for( "rot_vertex_atmos_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jv) { diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index 5904691..cf5f320 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -29,9 +29,9 @@ protected: static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_endblk = nblks_c - 1; // Test blocks [0 .. nblks_c-1] int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 std::vector<int> slev; std::vector<int> elev; bool lacc = false; // Not using ACC-specific behavior. @@ -74,7 +74,7 @@ protected: // We keep slev and elev as std::vector since they are small and used only // on the host. slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + elev.resize(dim4d, nlev - 1); // Full vertical range (0 .. nlev-1) } }; @@ -177,7 +177,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { // Initialize the arrays with random values. std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); for (int i = 0; i < nproma; ++i) { @@ -217,13 +217,13 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * @@ -380,7 +380,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); // Initialization with random values for (int i = 0; i < nproma; ++i) { @@ -434,13 +434,13 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { // Calculate reference value for first field ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, @@ -626,14 +626,14 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { Kokkos::deep_copy(f4dout_h, this->f4dout); // Compute reference result and check - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int ji = 0; ji < dim4d; ++ji) { - for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[ji]; jk <= this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { TypeParam expected = 0.0; for (int je = 0; je < 3; ++je) { expected += f4din_h[f4din_at( @@ -668,9 +668,9 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { // Vectors for additional parameters // Vectors for block and index ranges std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_endblk_in(3, nblks_c - 1); std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); + std::vector<int> i_endidx_in(3, nproma - 1); // Parameters for the test int patch_id = 1; @@ -806,9 +806,9 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { // Vectors for block and index ranges std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_endblk_in(3, nblks_c - 1); std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); + std::vector<int> i_endidx_in(3, nproma - 1); // Parameters for the test int patch_id = 1; @@ -907,13 +907,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); // Step 1: Calculate aux_c and aux_c2 - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + for (int jb = i_startblk_in[0]; jb <= i_endblk_in[0]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * @@ -940,13 +940,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { } // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + for (int jb = i_startblk_in[1]; jb <= i_endblk_in[1]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; ref_opt_out2[div_vec_c_at(jc, jk, jb)] = @@ -956,13 +956,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { } // Step 3: Perform averaging for the rest of the blocks - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + for (int jb = i_startblk_in[2]; jb <= i_endblk_in[2]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)] * avg_coeff_h[avg_coeff_at(jc, 0, jb)] + @@ -1026,9 +1026,9 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { // Vectors for block and index ranges std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_endblk_in(3, nblks_c - 1); std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); + std::vector<int> i_endidx_in(3, nproma - 1); // Parameters for the test int patch_id = 1; @@ -1167,9 +1167,9 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // Vectors for block and index ranges std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_endblk_in(3, nblks_c - 1); std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); + std::vector<int> i_endidx_in(3, nproma - 1); // Parameters for the test int patch_id = 1; @@ -1268,13 +1268,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + for (int jb = i_startblk_in[0]; jb <= i_endblk_in[0]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = vec_e_h[vec_e_at(cell_edge_idx_h[cell_edge_at(jc, jb, 0)], jk, cell_edge_blk_h[cell_edge_at(jc, jb, 0)])] * @@ -1291,13 +1291,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated // since l2fields=false) - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + for (int jb = i_startblk_in[1]; jb <= i_endblk_in[1]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; } @@ -1306,13 +1306,13 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, // not opt_out2) - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + for (int jb = i_startblk_in[2]; jb <= i_endblk_in[2]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)] * avg_coeff_h[avg_coeff_at(jc, 0, jb)] + diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index 57b77e6..d8ea721 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -48,8 +48,8 @@ protected: // Constant dimensions. static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 1; // number of vertical levels - static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks (for p_e_in) static constexpr std::tuple<int, int> lsq_dim = init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); static constexpr int lsq_dim_c = std::get<0>(lsq_dim); @@ -57,11 +57,11 @@ protected: // Parameter values. int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_endblk = nblks_c - 1; // Test blocks [0 .. nblks_c-1] int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 int slev = 0; - int elev = nlev; // Full vertical range (0 .. nlev-1) + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) int patch_id = 0; bool lacc = false; // Not using ACC-specific behavior. bool acc_async = false; // No asynchronous execution. @@ -252,7 +252,7 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); // Initialization for (int i = 0; i < nproma; ++i) { @@ -304,13 +304,15 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - + + // doing the calculation only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // calculation only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = p_cc_h[p_cc_at( cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, @@ -453,7 +455,7 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); // Initialization for (int i = 0; i < nproma; ++i) { @@ -497,12 +499,14 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // doing the calculation only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // calculation only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = p_cc_h[p_cc_at( cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, @@ -746,6 +750,7 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { Kokkos::View<TypeParam **, host_space> p_result_h("p_result_h", lsq_dim_unk + 1, nproma); + // calculating only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, @@ -753,8 +758,9 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Step 1: Calculate z_d values (matches the "recon_lsq_cell_q_step1" // parallel_for) + // calculating only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d_h(i, jc, jk) = p_cc_h[p_cc_at( @@ -767,8 +773,9 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Step 2: Calculate coefficients (matches the "recon_lsq_cell_q_step2" // parallel_for) + // calculating only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { // Matrix multiplication (Q^T * d) for (int j = 0; j < lsq_dim_unk; ++j) { z_qt_times_d_h(j) = 0.0; @@ -995,12 +1002,14 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { // Compute reference result std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // calculating only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // calculating only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = p_cc_h[p_cc_at( cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, @@ -1255,12 +1264,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // calculating only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // calculating only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = p_cc_h[p_cc_at( cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, @@ -1494,12 +1505,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // calculating only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // calculating only for jk = 0 for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = p_cc_h[p_cc_at( cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp index 2c8fc46..92100e3 100644 --- a/test/c/test_horizontal_rot.cpp +++ b/test/c/test_horizontal_rot.cpp @@ -30,9 +30,9 @@ protected: static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; - int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_endblk = nblks_v - 1; // Test blocks [0 .. nblks_v-1] int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 std::vector<int> slev; std::vector<int> elev; bool lacc = false; // Not using ACC-specific behavior. @@ -62,7 +62,7 @@ protected: // We keep slev and elev as std::vector since they are small and used only // on the host. slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + elev.resize(dim4d, nlev - 1); // Full vertical range (0 .. nlev-1) } }; @@ -217,13 +217,13 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { // Calculate reference values separately and verify results std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 0)], jk, vert_edge_blk_h[vert_edge_at(jv, jb, 0)])] * @@ -406,13 +406,13 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { // Calculate reference values separately and verify results std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { + for (int jk = this->slev[0]; jk <= this->elev[0]; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = vec_e_h[vec_e_at(vert_edge_idx_h[vert_edge_at(jv, jb, 0)], jk, vert_edge_blk_h[vert_edge_at(jv, jb, 0)])] * -- GitLab From 5500a3b161f7a104feab233f3c7a488ec3b16c68 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 4 Apr 2025 16:48:00 +0200 Subject: [PATCH 13/34] modified the unit tests for interpolation_vector --- test/c/CMakeLists.txt | 2 +- test/c/test_interpolation_vector.cpp | 458 +++++++++++++++++++++------ 2 files changed, 369 insertions(+), 91 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 2dd32f4..9d21819 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -36,7 +36,7 @@ set(SOURCES test_horizontal_recon.cpp test_horizontal_rot.cpp # test_tdma_solver.cpp - # test_interpolation_vector.cpp + test_interpolation_vector.cpp # test_intp_rbf.cpp # test_interpolation_scalar.cpp ) diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index 680fb6e..fbab6ca 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -12,104 +12,382 @@ #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <vector> +#include <random> #include "mo_lib_interpolation_vector.hpp" +#include "dim_helper.hpp" -// Dimensions for the test (small, trivial test). -// We assume Fortran ordering: column-major, but our C wrappers will wrap raw -// pointers into Kokkos::Views with LayoutLeft. -constexpr int nproma = 2; -constexpr int nlev = 3; -constexpr int nblks_e = 2; // For the edge arrays (p_vn_in, p_vt_in) -constexpr int nblks_c = 2; // For the cell arrays and interpolation coefficients - -// For the get_indices_c_lib inputs. -constexpr int i_startblk = 0; -constexpr int i_endblk = 1; // two blocks: indices 0 and 1 -constexpr int i_startidx_in = 0; -constexpr int i_endidx_in = nproma - 1; // 0 and 1 -constexpr int slev = 0; -constexpr int elev = nlev - 1; // 0 .. 2 - -// Helper to compute total number of elements for a 3D array stored in -// column-major order. -template <typename T> size_t num_elements(int dim1, int dim2, int dim3) { - return static_cast<size_t>(dim1) * dim2 * dim3; -} +/// Base test class for the edges2cells tests. Templated for the ValueType. +template <typename ValueType> +class Edges2CellsVectorTest : public ::testing::Test { +protected: + // Constant dimensions + static constexpr int nproma = 2; // inner loop length + static constexpr int nlev = 3; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int num_edges = 3; // number of edges per cell + + // Parameter values + int i_startblk = 0; + int i_endblk = nblks_c - 1; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Kokkos Views for test data + Kokkos::View<ValueType*, memory_space> p_vn_in; + Kokkos::View<ValueType*, memory_space> p_vt_in; + Kokkos::View<int*, memory_space> cell_edge_idx; + Kokkos::View<int*, memory_space> cell_edge_blk; + Kokkos::View<ValueType*, memory_space> e_bln_c_u; + Kokkos::View<ValueType*, memory_space> e_bln_c_v; + Kokkos::View<ValueType*, memory_space> p_u_out; + Kokkos::View<ValueType*, memory_space> p_v_out; + + Edges2CellsVectorTest() + : p_vn_in("p_vn_in", dim_combine(nproma, nlev, nblks_e)), + p_vt_in("p_vt_in", dim_combine(nproma, nlev, nblks_e)), + cell_edge_idx("cell_edge_idx", dim_combine(nproma, nblks_c, num_edges)), + cell_edge_blk("cell_edge_blk", dim_combine(nproma, nblks_c, num_edges)), + e_bln_c_u("e_bln_c_u", dim_combine(nproma, 6, nblks_c)), + e_bln_c_v("e_bln_c_v", dim_combine(nproma, 6, nblks_c)), + p_u_out("p_u_out", dim_combine(nproma, nlev, nblks_c)), + p_v_out("p_v_out", dim_combine(nproma, nlev, nblks_c)) + {} +}; + +/// ValueTypes to test with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(Edges2CellsVectorTest, ValueTypes); -// Test for the double precision (dp) version. -TEST(Edges2CellsTest, DPTest) { - // Allocate and fill input arrays. - std::vector<double> p_vn_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); - std::vector<double> p_vt_in(num_elements<double>(nproma, nlev, nblks_e), 1.0); - // cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3] - std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); - std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); - - // Here we set cell_edge_idx to 1, 2, 1 for every triple. - for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i + 1] = 2; - cell_edge_idx[i + 2] = 1; +TYPED_TEST(Edges2CellsVectorTest, BasicTest) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_c = this->nblks_c; + constexpr int num_edges = this->num_edges; + + // Define indexing helpers + const auto &vn_at = at<nproma, nlev, nblks_e>; + const auto &vt_at = at<nproma, nlev, nblks_e>; + const auto &edge_idx_at = at<nproma, nblks_c, num_edges>; + const auto &edge_blk_at = at<nproma, nblks_c, num_edges>; + const auto &bln_at = at<nproma, 6, nblks_c>; + const auto &out_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto p_vt_in_h = Kokkos::create_mirror_view(this->p_vt_in); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto e_bln_c_u_h = Kokkos::create_mirror_view(this->e_bln_c_u); + auto e_bln_c_v_h = Kokkos::create_mirror_view(this->e_bln_c_v); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Initialize with simple values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vn_in_h[vn_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik + ib); + p_vt_in_h[vt_at(ic, ik, ib)] = static_cast<TypeParam>(2.0 + ic + ik + ib); + } + } } - // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 - // means block 0 after subtracting 1). e_bln_c_u and e_bln_c_v: dimensions - // [nproma, 6, nblks_c] - std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0); - std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0); - // Output arrays: dimensions [nproma, nlev, nblks_c] - std::vector<double> p_u_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); - std::vector<double> p_v_out(num_elements<double>(nproma, nlev, nblks_c), 0.0); - - std::vector<double> p_u_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); - std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0); - - // Call the dp (double precision) version. - edges2cells_vector_lib<double>( - p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), - cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), - p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, - elev, nproma, nlev, nblks_e, nblks_c); - - // Check that for each computed cell in p_u_out and p_v_out, the value is 6. - // This is because for each cell, the kernel adds 6 terms of 1*1. - for (size_t idx = 0; idx < p_u_out.size(); ++idx) { - EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12); - EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12); + + // Set each cell to connect to 3 edges + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Edge indices are 1-indexed in the function + cell_edge_idx_h[edge_idx_at(ic, ib, 0)] = 1; + cell_edge_idx_h[edge_idx_at(ic, ib, 1)] = 2; + cell_edge_idx_h[edge_idx_at(ic, ib, 2)] = 3; + + // Edge blocks are 1-indexed in the function + cell_edge_blk_h[edge_blk_at(ic, ib, 0)] = 1; + cell_edge_blk_h[edge_blk_at(ic, ib, 1)] = 1; + cell_edge_blk_h[edge_blk_at(ic, ib, 2)] = 1; + + // Initialize bilinear coefficients + for (int j = 0; j < 6; ++j) { + e_bln_c_u_h[bln_at(ic, j, ib)] = static_cast<TypeParam>(0.1 * (j + 1)); + e_bln_c_v_h[bln_at(ic, j, ib)] = static_cast<TypeParam>(0.05 * (j + 1)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[out_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + p_v_out_h[out_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->p_vt_in, p_vt_in_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->e_bln_c_u, e_bln_c_u_h); + Kokkos::deep_copy(this->e_bln_c_v, e_bln_c_v_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function + edges2cells_vector_lib<TypeParam>( + this->p_vn_in.data(), this->p_vt_in.data(), + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->e_bln_c_u.data(), this->e_bln_c_v.data(), + this->p_u_out.data(), this->p_v_out.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev, this->elev, nproma, nlev, nblks_e, nblks_c); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Compute expected results on host + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam**, host_space> expected_u("expected_u", nproma, nlev); + Kokkos::View<TypeParam**, host_space> expected_v("expected_v", nproma, nlev); + + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + // Compute expected values + expected_u(jc, jk) = + e_bln_c_u_h[bln_at(jc, 0, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_u_h[bln_at(jc, 1, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_u_h[bln_at(jc, 2, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_u_h[bln_at(jc, 3, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_u_h[bln_at(jc, 4, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)] + + e_bln_c_u_h[bln_at(jc, 5, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)]; + + expected_v(jc, jk) = + e_bln_c_v_h[bln_at(jc, 0, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_v_h[bln_at(jc, 1, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_v_h[bln_at(jc, 2, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_v_h[bln_at(jc, 3, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_v_h[bln_at(jc, 4, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)] + + e_bln_c_v_h[bln_at(jc, 5, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)]; + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx_in; jc <= this->i_endidx_in; ++jc) { + EXPECT_NEAR(p_u_out_h[out_at(jc, jk, jb)], expected_u(jc, jk), 1e-5) + << "u value mismatch at jc=" << jc << ", jk=" << jk; + EXPECT_NEAR(p_v_out_h[out_at(jc, jk, jb)], expected_v(jc, jk), 1e-5) + << "v value mismatch at jc=" << jc << ", jk=" << jk; + } + } } } -// Test for the single precision (sp) version. -TEST(Edges2CellsTest, SPTest) { - // Allocate and fill input arrays. - std::vector<float> p_vn_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); - std::vector<float> p_vt_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f); - std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1); - std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1); - // Set cell_edge_idx values to 1, 2, 1. - for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) { - cell_edge_idx[i] = 1; - cell_edge_idx[i + 1] = 2; - cell_edge_idx[i + 2] = 1; +TYPED_TEST(Edges2CellsVectorTest, RandomTest) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_c = this->nblks_c; + constexpr int num_edges = this->num_edges; + + // Define indexing helpers + const auto &vn_at = at<nproma, nlev, nblks_e>; + const auto &vt_at = at<nproma, nlev, nblks_e>; + const auto &edge_idx_at = at<nproma, nblks_c, num_edges>; + const auto &edge_blk_at = at<nproma, nblks_c, num_edges>; + const auto &bln_at = at<nproma, 6, nblks_c>; + const auto &out_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto p_vt_in_h = Kokkos::create_mirror_view(this->p_vt_in); + auto cell_edge_idx_h = Kokkos::create_mirror_view(this->cell_edge_idx); + auto cell_edge_blk_h = Kokkos::create_mirror_view(this->cell_edge_blk); + auto e_bln_c_u_h = Kokkos::create_mirror_view(this->e_bln_c_u); + auto e_bln_c_v_h = Kokkos::create_mirror_view(this->e_bln_c_v); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> edge_distrib(1, num_edges); + std::uniform_int_distribution<int> block_distrib(1, nblks_e); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vn_in_h[vn_at(ic, ik, ib)] = real_distrib(gen); + p_vt_in_h[vt_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // // Set each cell to connect to random edges + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Edge indices are 1-indexed in the function + cell_edge_idx_h[edge_idx_at(ic, ib, 0)] = edge_distrib(gen); + cell_edge_idx_h[edge_idx_at(ic, ib, 1)] = edge_distrib(gen); + cell_edge_idx_h[edge_idx_at(ic, ib, 2)] = edge_distrib(gen); + + // Edge blocks are 1-indexed in the function + cell_edge_blk_h[edge_blk_at(ic, ib, 0)] = block_distrib(gen); + cell_edge_blk_h[edge_blk_at(ic, ib, 1)] = block_distrib(gen); + cell_edge_blk_h[edge_blk_at(ic, ib, 2)] = block_distrib(gen); + + // Initialize random bilinear coefficients + for (int j = 0; j < 6; ++j) { + e_bln_c_u_h[bln_at(ic, j, ib)] = real_distrib(gen); + e_bln_c_v_h[bln_at(ic, j, ib)] = real_distrib(gen); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[out_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + p_v_out_h[out_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } } - std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f); - std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f); - std::vector<float> p_u_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); - std::vector<float> p_v_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f); - - std::vector<float> p_u_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); - std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f); - - // Call the sp (float precision) version. - edges2cells_vector_lib<float>( - p_vn_in.data(), p_vt_in.data(), cell_edge_idx.data(), - cell_edge_blk.data(), e_bln_c_u.data(), e_bln_c_v.data(), p_u_out.data(), - p_v_out.data(), i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, - elev, nproma, nlev, nblks_e, nblks_c); - - // Verify that every computed output equals 6. - for (size_t idx = 0; idx < p_u_out.size(); ++idx) { - EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f); - EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-5f); + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->p_vt_in, p_vt_in_h); + Kokkos::deep_copy(this->cell_edge_idx, cell_edge_idx_h); + Kokkos::deep_copy(this->cell_edge_blk, cell_edge_blk_h); + Kokkos::deep_copy(this->e_bln_c_u, e_bln_c_u_h); + Kokkos::deep_copy(this->e_bln_c_v, e_bln_c_v_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function + edges2cells_vector_lib<TypeParam>( + this->p_vn_in.data(), this->p_vt_in.data(), + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->e_bln_c_u.data(), this->e_bln_c_v.data(), + this->p_u_out.data(), this->p_v_out.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev, this->elev, nproma, nlev, nblks_e, nblks_c); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Compute expected results on host + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_u("expected_u", nproma, nlev, nblks_c); + Kokkos::View<TypeParam***, host_space> expected_v("expected_v", nproma, nlev, nblks_c); + + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + // Compute expected values + expected_u(jc, jk, jb) = + e_bln_c_u_h[bln_at(jc, 0, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_u_h[bln_at(jc, 1, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_u_h[bln_at(jc, 2, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_u_h[bln_at(jc, 3, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_u_h[bln_at(jc, 4, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)] + + e_bln_c_u_h[bln_at(jc, 5, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)]; + + expected_v(jc, jk, jb) = + e_bln_c_v_h[bln_at(jc, 0, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_v_h[bln_at(jc, 1, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 0)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 0)] - 1)] + + e_bln_c_v_h[bln_at(jc, 2, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_v_h[bln_at(jc, 3, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 1)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 1)] - 1)] + + e_bln_c_v_h[bln_at(jc, 4, jb)] * + p_vn_in_h[vn_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)] + + e_bln_c_v_h[bln_at(jc, 5, jb)] * + p_vt_in_h[vt_at(cell_edge_idx_h[edge_idx_at(jc, jb, 2)] - 1, jk, + cell_edge_blk_h[edge_blk_at(jc, jb, 2)] - 1)]; + } + } + } + + Kokkos::fence(); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx_in; jc <= this->i_endidx_in; ++jc) { + EXPECT_NEAR(p_u_out_h[out_at(jc, jk, 0)], expected_u(jc, jk, 0), 1e-5) + << "u value mismatch at jc=" << jc << ", jk=" << jk; + EXPECT_NEAR(p_v_out_h[out_at(jc, jk, 0)], expected_v(jc, jk, 0), 1e-5) + << "v value mismatch at jc=" << jc << ", jk=" << jk; + } + } } } -- GitLab From fecec078c8572e6c0aebe86de7bdeb8368c22ccc Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 7 Apr 2025 21:29:36 +0200 Subject: [PATCH 14/34] fixed a bug in mo_lib_interpolation_scalar --- src/interpolation/mo_lib_interpolation_scalar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 8910cb2..51edcda 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -569,7 +569,7 @@ void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk // averaging coefficients, dim: (nproma,nlev,nblks_c) - UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, 4, nblks_c); // cell based variable after averaging, dim: (nproma,nlev,nblks_c) UnmanagedT3D avg_psi_c_view(avg_psi_c, nproma, nlev, nblks_c); -- GitLab From c6a39b5a36b8981f67495f2ea5f32f9af42a08c0 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 7 Apr 2025 21:30:40 +0200 Subject: [PATCH 15/34] corrected the way loop_exchange is defined in test CMake --- test/c/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 9d21819..b2bba98 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -26,9 +26,6 @@ message(CHECK_PASS "done") # Find Kokkos (or use your existing Kokkos installation) # find_package(Kokkos REQUIRED) -if(IM_ENABLE_LOOP_EXCHANGE) - target_compile_definitions(iconmath-interpolation PRIVATE __LOOP_EXCHANGE) -endif() set(SOURCES main.cpp @@ -43,6 +40,10 @@ set(SOURCES # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) +if(IM_ENABLE_LOOP_EXCHANGE) + target_compile_definitions(iconmath_test_c PRIVATE __LOOP_EXCHANGE) +endif() + target_include_directories(iconmath_test_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) # Link the test executable with GoogleTest and Kokkos. -- GitLab From aab7047dbdb8b2686c8b2b3dd04f0d54e9396d30 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 7 Apr 2025 21:31:27 +0200 Subject: [PATCH 16/34] renamed few unit-tests --- test/c/test_interpolation_vector.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index fbab6ca..497b1d5 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -19,7 +19,7 @@ /// Base test class for the edges2cells tests. Templated for the ValueType. template <typename ValueType> -class Edges2CellsVectorTest : public ::testing::Test { +class InterpolationVectorTest : public ::testing::Test { protected: // Constant dimensions static constexpr int nproma = 2; // inner loop length @@ -50,7 +50,7 @@ protected: Kokkos::View<ValueType*, memory_space> p_u_out; Kokkos::View<ValueType*, memory_space> p_v_out; - Edges2CellsVectorTest() + InterpolationVectorTest() : p_vn_in("p_vn_in", dim_combine(nproma, nlev, nblks_e)), p_vt_in("p_vt_in", dim_combine(nproma, nlev, nblks_e)), cell_edge_idx("cell_edge_idx", dim_combine(nproma, nblks_c, num_edges)), @@ -65,9 +65,9 @@ protected: /// ValueTypes to test with typedef ::testing::Types<float, double> ValueTypes; -TYPED_TEST_SUITE(Edges2CellsVectorTest, ValueTypes); +TYPED_TEST_SUITE(InterpolationVectorTest, ValueTypes); -TYPED_TEST(Edges2CellsVectorTest, BasicTest) { +TYPED_TEST(InterpolationVectorTest, Edges2CellsSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_e = this->nblks_e; @@ -226,7 +226,7 @@ TYPED_TEST(Edges2CellsVectorTest, BasicTest) { } } -TYPED_TEST(Edges2CellsVectorTest, RandomTest) { +TYPED_TEST(InterpolationVectorTest, Edges2CellsRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_e = this->nblks_e; -- GitLab From b48e6d558213605da6fcb953f653405eb5dfdbbb Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 7 Apr 2025 21:32:08 +0200 Subject: [PATCH 17/34] change the random number generation in test_horizontal_rot --- test/c/test_horizontal_rot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp index 92100e3..69e9d03 100644 --- a/test/c/test_horizontal_rot.cpp +++ b/test/c/test_horizontal_rot.cpp @@ -170,7 +170,7 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); // Initialization with random values for (int i = 0; i < nproma; ++i) { @@ -359,7 +359,7 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + std::uniform_real_distribution<TypeParam> real_distrib(-1.0, 1.0); // Initialization with random values for (int i = 0; i < nproma; ++i) { -- GitLab From c1af2bc238c574276c43e47b585e7a4b3a8cd43e Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 7 Apr 2025 21:32:59 +0200 Subject: [PATCH 18/34] Made an overhaul of test_interpolation_scalar removed normalization of the input arrays in test_interpolation_scalar --- test/c/CMakeLists.txt | 2 +- test/c/test_interpolation_scalar.cpp | 2075 ++++++++++++++++++++++---- 2 files changed, 1777 insertions(+), 300 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index b2bba98..e707970 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -35,7 +35,7 @@ set(SOURCES # test_tdma_solver.cpp test_interpolation_vector.cpp # test_intp_rbf.cpp - # test_interpolation_scalar.cpp + test_interpolation_scalar.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 507ec3f..94c33dd 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -10,18 +10,12 @@ // --------------------------------------------------------------- #include "mo_lib_interpolation_scalar.hpp" +#include "mo_lib_loopindices.hpp" #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <vector> - -// Free-function helpers for 3D and 4D array sizes (assumed column-major) -template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { - return static_cast<size_t>(d1) * d2 * d3; -} - -template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { - return static_cast<size_t>(d1) * d2 * d3 * d4; -} +#include <random> +#include "dim_helper.hpp" // Define a helper struct that holds the two types. template <typename InT, typename OutT> struct MixedPrecision { @@ -44,116 +38,99 @@ typedef ::testing::Types<MixedPrecision<double, double>, class interp_dimensions { public: // Constant dimensions. - static constexpr int nproma = 16; // inner loop length - static constexpr int nlev = 7; // number of vertical levels + static constexpr int nproma = 2; // inner loop length + static constexpr int nlev = 3; // number of vertical levels static constexpr int nblks_c = 2; // number of cell blocks static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) static constexpr int nblks_v = 2; // number of vertex blocks + static constexpr int cell_type = 6; + static constexpr int npromz_c = 2; + // Parameter values. const int i_startblk = 0; - const int i_endblk = 1; // Test blocks [0, 1] - const int i_startidx = 2; - const int i_endidx = nproma - 3; // Partial range: 2 .. nproma-3 - const int slev = 1; + const int i_endblk = nblks_c - 1; // Test blocks [0, 1] + const int i_startidx = 0; + const int i_endidx = nproma - 1; // Partial range: 2 .. nproma-3 + const int slev = 0; const int elev = nlev - 1; // Partial vertical range (1 .. nlev-1) const bool lacc = false; // Not using ACC-specific behavior. const bool acc_async = false; // No asynchronous execution. }; -template <typename T> -class InterpolationScalarTypedTestFixture : public ::testing::Test, +template <typename ValueType> +class InterpolationScalarSingleParamTest : public ::testing::Test, public interp_dimensions { -public: - // Arrays used for verts2edges - std::vector<T> p_vertex_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> edge_vertex_idx; // Dimensions: (nproma, nblks_e, 4) - std::vector<int> edge_vertex_blk; // Dimensions: (nproma, nblks_e, 4) - std::vector<T> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) - std::vector<T> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) - - // Arrays used for edges2verts - std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_vert_idx; // Dimensions: (nproma, nblks_e, 6) - std::vector<int> edge_vert_blk; // Dimensions: (nproma, nblks_e, 6) - std::vector<T> v_int; // Dimensions: (nproma, 6, nblks_v) - std::vector<T> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - - // Arrays used for edges2cells - // std::vector<T> p_edge_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> edge_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> edge_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<T> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) - std::vector<T> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) + protected: + + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + Kokkos::View<ValueType*, memory_space> p_vertex_in; + Kokkos::View<int*, memory_space> edge_vertex_idx; + Kokkos::View<int*, memory_space> edge_vertex_blk; + Kokkos::View<ValueType*, memory_space> coeff_int_edges; + Kokkos::View<ValueType*, memory_space> p_edge_out; + + // // Arrays used for edges2verts + Kokkos::View<ValueType*, memory_space> p_edge_in; + Kokkos::View<int*, memory_space> edge_vert_idx; + Kokkos::View<int*, memory_space> edge_vert_blk; + Kokkos::View<ValueType*, memory_space> v_int; + Kokkos::View<ValueType*, memory_space> p_vert_out; + + // // Arrays used for edges2cells + Kokkos::View<int*, memory_space> edge_idx; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<int*, memory_space> edge_blk; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<ValueType*, memory_space> coeff_int_cells; // Dimensions: (nproma, 3, nblks_c) + Kokkos::View<ValueType*, memory_space> p_cell_out; // Dimensions: (nproma, nlev, nblks_c) // Arrays used for verts2cells - std::vector<T> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) - std::vector<int> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<ValueType*, memory_space> p_vert_in; // Dimensions: (nproma, nlev, nblks_v) + Kokkos::View<int*, memory_space> cell_index_idx; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<int*, memory_space> cell_index_blk; // Dimensions: (nproma, nblks_c, 3) // Arrays used for avg_lib - std::vector<T> psi_c; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) - std::vector<int> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) - std::vector<T> avg_coeff; // Dimensions: (nproma, nlev, nblks_c) - std::vector<T> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) - - const int cell_type = 6; - const int npromz_c = 32; - - InterpolationScalarTypedTestFixture() { - // Allocate and initialize arrays needed for verts2edges - p_vertex_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), - static_cast<T>(1)); - edge_vertex_idx.resize(num_elements_3d<int>(nproma, nblks_e, 4), 1); - edge_vertex_blk.resize(num_elements_3d<int>(nproma, nblks_e, 4), 0); - coeff_int_edges.resize(num_elements_3d<T>(nproma, 2, nblks_e), - static_cast<T>(1)); - - p_edge_out.resize(num_elements_3d<T>(nproma, nlev, nblks_e), - static_cast<T>(0)); - - // Allocate & Initialize arrays needed for edges2verts - p_edge_in.resize(num_elements_3d<T>(nproma, nlev, nblks_e), - static_cast<T>(1)); - edge_vert_idx.resize(num_elements_3d<int>(nproma, nblks_e, 6), 1); - edge_vert_blk.resize(num_elements_3d<int>(nproma, nblks_e, 6), 0); - v_int.resize(num_elements_3d<T>(nproma, 6, nblks_v), static_cast<T>(1)); - - p_vert_out.resize(num_elements_3d<T>(nproma, nlev, nblks_v), - static_cast<T>(0)); - - // Allocate & Initialize arrays needed for edges2cells - edge_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); - edge_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - coeff_int_cells.resize(num_elements_3d<T>(nproma, 3, nblks_c), - static_cast<T>(1)); - - p_cell_out.resize(num_elements_3d<T>(nproma, nlev, nblks_c), - static_cast<T>(0)); - - // Allocate and initialize arrays needed for verts2cells - p_vert_in.resize(num_elements_3d<T>(nproma, nlev, nblks_v), - static_cast<T>(1)); - cell_index_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); - cell_index_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - - // Allocate and initialize arrays needed for avg_lib - psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), static_cast<T>(1)); - cell_neighbor_idx.resize(num_elements_3d<int>(nproma, nblks_c, 3), 1); - cell_neighbor_blk.resize(num_elements_3d<int>(nproma, nblks_c, 3), 0); - avg_coeff.resize(num_elements_3d<T>(nproma, nlev, nblks_c), - static_cast<T>(1)); - - // Allocate output arrays and initialize to zero. - avg_psi_c.resize(num_elements_3d<T>(nproma, nlev, nblks_c), - static_cast<T>(0)); - } + Kokkos::View<ValueType*, memory_space> psi_c; // Dimensions: (nproma, nlev, nblks_c) + Kokkos::View<int*, memory_space> cell_neighbor_idx; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<int*, memory_space> cell_neighbor_blk; // Dimensions: (nproma, nblks_c, 3) + Kokkos::View<ValueType*, memory_space> avg_coeff; // Dimensions: (nproma, 4, nblks_c) + Kokkos::View<ValueType*, memory_space> avg_psi_c; // Dimensions: (nproma, nlev, nblks_c) + + InterpolationScalarSingleParamTest() + : p_vertex_in("p_vertex_in", nproma * nlev * nblks_v), + edge_vertex_idx("edge_vertex_idx", nproma * nblks_e * 4), + edge_vertex_blk("edge_vertex_blk", nproma * nblks_e * 4), + coeff_int_edges("coeff_int_edges", nproma * 2 * nblks_e), + p_edge_out("p_edge_out", nproma * nlev * nblks_e), + + p_edge_in("p_edge_in", nproma * nlev * nblks_e), + edge_vert_idx("edge_vert_idx", nproma * nblks_e * 6), + edge_vert_blk("edge_vert_blk", nproma * nblks_e * 6), + v_int("v_int", nproma * 6 * nblks_v), + p_vert_out("p_vert_out", nproma * nlev * nblks_v), + + edge_idx("edge_idx", nproma * nblks_c * 3), + edge_blk("edge_blk", nproma * nblks_c * 3), + coeff_int_cells("coeff_int_cells", nproma * 3 * nblks_c), + p_cell_out("p_cell_out", nproma * nlev * nblks_c), + + p_vert_in("p_vert_in", nproma * nlev * nblks_v), + cell_index_idx("cell_index_idx", nproma * nblks_c * 3), + cell_index_blk("cell_index_blk", nproma * nblks_c * 3), + + psi_c("psi_c", nproma * nlev * nblks_c), + cell_neighbor_idx("cell_neighbor_idx", nproma * nblks_c * 3), + cell_neighbor_blk("cell_neighbor_blk", nproma * nblks_c * 3), + avg_coeff("avg_coeff", nproma * 4 * nblks_c), // 4 coefficients (self + 3 neighbors) + avg_psi_c("avg_psi_c", nproma * nlev * nblks_c) + {} }; typedef ::testing::Types<float, double> SingleType; -TYPED_TEST_SUITE(InterpolationScalarTypedTestFixture, SingleType); +TYPED_TEST_SUITE(InterpolationScalarSingleParamTest, SingleType); //////////////////////////////////////////////////////////////////////////////// // @@ -161,29 +138,206 @@ TYPED_TEST_SUITE(InterpolationScalarTypedTestFixture, SingleType); // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { +TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesSpecific) { + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_v = this->nblks_v; + constexpr int nblks_e = this->nblks_e; + + // Define indexing helpers + const auto &vertex_at = at<nproma, nlev, nblks_v>; + const auto &idx_at = at<nproma, nblks_e, 4>; + const auto &blk_at = at<nproma, nblks_e, 4>; + const auto &coeff_at = at<nproma, 2, nblks_e>; + const auto &edge_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_vertex_in_h = Kokkos::create_mirror_view(this->p_vertex_in); + auto edge_vertex_idx_h = Kokkos::create_mirror_view(this->edge_vertex_idx); + auto edge_vertex_blk_h = Kokkos::create_mirror_view(this->edge_vertex_blk); + auto coeff_int_edges_h = Kokkos::create_mirror_view(this->coeff_int_edges); + auto p_edge_out_h = Kokkos::create_mirror_view(this->p_edge_out); + + // Initialize with specific test values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vertex_in_h[vertex_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01);; + } + } + } + + // Initialize edge connectivity indices with specific pattern + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each edge connects to two specific vertices + edge_vertex_idx_h[idx_at(ic, ib, 0)] = ic % (nproma - 1); // First vertex index + edge_vertex_idx_h[idx_at(ic, ib, 1)] = (ic + 1) % nproma; // Second vertex index + edge_vertex_idx_h[idx_at(ic, ib, 2)] = 0; // Not used + edge_vertex_idx_h[idx_at(ic, ib, 3)] = 0; // Not used + + edge_vertex_blk_h[blk_at(ic, ib, 0)] = ib % nblks_v; // First vertex block + edge_vertex_blk_h[blk_at(ic, ib, 1)] = (ib + 1) % nblks_v; // Second vertex block + edge_vertex_blk_h[blk_at(ic, ib, 2)] = 0; // Not used + edge_vertex_blk_h[blk_at(ic, ib, 3)] = 0; // Not used + + coeff_int_edges_h[coeff_at(ic, 0, ib)] = static_cast<TypeParam>(0.5 + ic * 0.01); + coeff_int_edges_h[coeff_at(ic, 1, ib)] = static_cast<TypeParam>(0.5 + ib * 0.01); + + // Initialize output to zero and calculate expected results + for (int ik = 0; ik < nlev; ++ik) { + p_edge_out_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // copy data to device + Kokkos::deep_copy(this->p_vertex_in, p_vertex_in_h); + Kokkos::deep_copy(this->edge_vertex_idx, edge_vertex_idx_h); + Kokkos::deep_copy(this->edge_vertex_blk, edge_vertex_blk_h); + Kokkos::deep_copy(this->coeff_int_edges, coeff_int_edges_h); + Kokkos::deep_copy(this->p_edge_out, p_edge_out_h); verts2edges_scalar_lib<TypeParam>( this->p_vertex_in.data(), this->edge_vertex_idx.data(), this->edge_vertex_blk.data(), this->coeff_int_edges.data(), this->p_edge_out.data(), this->i_startblk, this->i_endblk, - this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, - this->nlev, this->nblks_v, this->nblks_e, this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx, i_endidx] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 2 stencil points, - // expect 2. - EXPECT_NEAR(this->p_edge_out[idx], static_cast<TypeParam>(2), - static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + this->i_startidx, this->i_endidx, this->slev, this->elev, nproma, + nlev, nblks_v, nblks_e, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_edges(12); + int idx = 0; + std::generate(expected_edges.begin(), expected_edges.end(), [&idx]() { + TypeParam values[] = { + 1.505, 1.015, 1.605, 1.116, 1.705, 1.217, + 1.525, 1.0251, 1.626, 1.1271, 1.727, 1.2291 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + EXPECT_NEAR(p_edge_out_h[edge_at(jv, jk, jb)], + expected_edges[edge_at(jv, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; + } + } + } +} + +// Repeat the same test with randomized data +TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesRandom) { + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_v = this->nblks_v; + constexpr int nblks_e = this->nblks_e; + + // Define indexing helpers + const auto &vertex_at = at<nproma, nlev, nblks_v>; + const auto &idx_at = at<nproma, nblks_e, 4>; + const auto &blk_at = at<nproma, nblks_e, 4>; + const auto &coeff_at = at<nproma, 2, nblks_e>; + const auto &edge_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_vertex_in_h = Kokkos::create_mirror_view(this->p_vertex_in); + auto edge_vertex_idx_h = Kokkos::create_mirror_view(this->edge_vertex_idx); + auto edge_vertex_blk_h = Kokkos::create_mirror_view(this->edge_vertex_blk); + auto coeff_int_edges_h = Kokkos::create_mirror_view(this->coeff_int_edges); + auto p_edge_out_h = Kokkos::create_mirror_view(this->p_edge_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_v - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vertex_in_h[vertex_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // Initialize edge connectivity indices with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + edge_vertex_idx_h[idx_at(ic, ib, 0)] = edge_distrib(gen); + edge_vertex_idx_h[idx_at(ic, ib, 1)] = edge_distrib(gen); + edge_vertex_idx_h[idx_at(ic, ib, 2)] = edge_distrib(gen); + edge_vertex_idx_h[idx_at(ic, ib, 3)] = edge_distrib(gen); + + edge_vertex_blk_h[blk_at(ic, ib, 0)] = block_distrib(gen); + edge_vertex_blk_h[blk_at(ic, ib, 1)] = block_distrib(gen); + edge_vertex_blk_h[blk_at(ic, ib, 2)] = block_distrib(gen); + edge_vertex_blk_h[blk_at(ic, ib, 3)] = block_distrib(gen); + + coeff_int_edges_h[coeff_at(ic, 0, ib)] = real_distrib(gen); + coeff_int_edges_h[coeff_at(ic, 1, ib)] = real_distrib(gen); + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_edge_out_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // copy data to device + Kokkos::deep_copy(this->p_vertex_in, p_vertex_in_h); + Kokkos::deep_copy(this->edge_vertex_idx, edge_vertex_idx_h); + Kokkos::deep_copy(this->edge_vertex_blk, edge_vertex_blk_h); + Kokkos::deep_copy(this->coeff_int_edges, coeff_int_edges_h); + Kokkos::deep_copy(this->p_edge_out, p_edge_out_h); + + // Call the function + verts2edges_scalar_lib<TypeParam>( + this->p_vertex_in.data(), this->edge_vertex_idx.data(), + this->edge_vertex_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, nproma, + nlev, nblks_v, nblks_e, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_edges("expected_edges", nproma, nlev, nblks_e); + + for (int ib = this->i_startblk; ib <= this->i_endblk; ++ib) { + for (int ik = this->slev; ik <= this->elev; ++ik) { + for (int ic = this->i_startidx; ic <= this->i_endidx; ++ic) { + // Compute expected values + expected_edges(ic, ik, ib) = + coeff_int_edges_h[coeff_at(ic, 0, ib)] * + p_vertex_in_h[vertex_at(edge_vertex_idx_h[idx_at(ic, ib, 0)], ik, + edge_vertex_blk_h[blk_at(ic, ib, 0)])] + + coeff_int_edges_h[coeff_at(ic, 1, ib)] * + p_vertex_in_h[vertex_at(edge_vertex_idx_h[idx_at(ic, ib, 1)], ik, + edge_vertex_blk_h[blk_at(ic, ib, 1)])]; + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + EXPECT_NEAR(p_edge_out_h[edge_at(jv, jk, jb)], + expected_edges(jv, jk, jb), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } } @@ -195,29 +349,199 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Edges) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { +TYPED_TEST(InterpolationScalarSingleParamTest, Edges2VertsSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; + + // Create host mirror views + auto p_edge_in_h = Kokkos::create_mirror_view(this->p_edge_in); + auto edge_vert_idx_h = Kokkos::create_mirror_view(this->edge_vert_idx); + auto edge_vert_blk_h = Kokkos::create_mirror_view(this->edge_vert_blk); + auto v_int_h = Kokkos::create_mirror_view(this->v_int); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_edge_in_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize vertex connectivity indices with specific pattern + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex connects to 6 edges + for (int j = 0; j < 6; ++j) { + // Edge indices with a pattern + edge_vert_idx_h[idx_at(ic, ib, j)] = (ic + j) % nproma; + edge_vert_blk_h[blk_at(ic, ib, j)] = (ib + j % 2) % nblks_e; + + // Interpolation coefficients that depend on indices + v_int_h[coeff_at(ic, j, ib)] = static_cast<TypeParam>(1.0 / 6.0 + j * 0.01); +} + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + // Copy to device + Kokkos::deep_copy(this->p_edge_in, p_edge_in_h); + Kokkos::deep_copy(this->edge_vert_idx, edge_vert_idx_h); + Kokkos::deep_copy(this->edge_vert_blk, edge_vert_blk_h); + Kokkos::deep_copy(this->v_int, v_int_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + + // Call the function under test edges2verts_scalar_lib<TypeParam>( this->p_edge_in.data(), this->edge_vert_idx.data(), - this->edge_vert_blk.data(), this->v_int.data(), this->p_vert_out.data(), - this->i_startblk, this->i_endblk, this->i_startidx, this->i_endidx, - this->slev, this->elev, this->nproma, this->nlev, this->nblks_e, - this->nblks_v, this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, - // expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<TypeParam>(6), - static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + this->edge_vert_blk.data(), this->v_int.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_e, nblks_v, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_verts(12); + int idx = 0; + std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { + TypeParam values[] = { + 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, + 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts[vert_at(jv, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; + } + } + } +} + +TYPED_TEST(InterpolationScalarSingleParamTest, Edges2VertsRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; + + // Create host mirror views + auto p_edge_in_h = Kokkos::create_mirror_view(this->p_edge_in); + auto edge_vert_idx_h = Kokkos::create_mirror_view(this->edge_vert_idx); + auto edge_vert_blk_h = Kokkos::create_mirror_view(this->edge_vert_blk); + auto v_int_h = Kokkos::create_mirror_view(this->v_int); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_e - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_edge_in_h[edge_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // Initialize vertex connectivity indices with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex connects to 6 edges + for (int j = 0; j < 6; ++j) { + edge_vert_idx_h[idx_at(ic, ib, j)] = edge_distrib(gen); + edge_vert_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + + // Random interpolation coefficients + v_int_h[coeff_at(ic, j, ib)] = real_distrib(gen) / 6.0; // Scaled to ensure reasonable sums + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_edge_in, p_edge_in_h); + Kokkos::deep_copy(this->edge_vert_idx, edge_vert_idx_h); + Kokkos::deep_copy(this->edge_vert_blk, edge_vert_blk_h); + Kokkos::deep_copy(this->v_int, v_int_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + + // Call the function under test + edges2verts_scalar_lib<TypeParam>( + this->p_edge_in.data(), this->edge_vert_idx.data(), + this->edge_vert_blk.data(), this->v_int.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_e, nblks_v, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_verts("expected_verts", nproma, nlev, nblks_v); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + expected_verts(jv, jk, jb) = 0.0; + + for (int j = 0; j < 6; ++j) { + int edge_idx = edge_vert_idx_h[idx_at(jv, jb, j)]; + int edge_blk = edge_vert_blk_h[blk_at(jv, jb, j)]; + TypeParam coeff = v_int_h[coeff_at(jv, j, jb)]; + + expected_verts(jv, jk, jb) += coeff * p_edge_in_h[edge_at(edge_idx, jk, edge_blk)]; + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts(jv, jk, jb), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } } @@ -229,55 +553,409 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Verts) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarTypedTestFixture, Edges2Cells) { +TYPED_TEST(InterpolationScalarSingleParamTest, Edges2CellsSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_c = this->nblks_c; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 3, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_edge_in_h = Kokkos::create_mirror_view(this->p_edge_in); + auto edge_idx_h = Kokkos::create_mirror_view(this->edge_idx); + auto edge_blk_h = Kokkos::create_mirror_view(this->edge_blk); + auto coeff_int_cells_h = Kokkos::create_mirror_view(this->coeff_int_cells); + auto p_cell_out_h = Kokkos::create_mirror_view(this->p_cell_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_edge_in_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + // Initialize cell connectivity indices with specific pattern + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell connects to 3 edges + for (int j = 0; j < 3; ++j) { + // Edge indices with a pattern + edge_idx_h[idx_at(ic, ib, j)] = (ic + j) % nproma; + edge_blk_h[blk_at(ic, ib, j)] = (ib + j % 2) % nblks_e; + + // Interpolation coefficients that depend on indices + coeff_int_cells_h[coeff_at(ic, j, ib)] = static_cast<TypeParam>(1.0 / 3.0 + j * 0.01); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_cell_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_edge_in, p_edge_in_h); + Kokkos::deep_copy(this->edge_idx, edge_idx_h); + Kokkos::deep_copy(this->edge_blk, edge_blk_h); + Kokkos::deep_copy(this->coeff_int_cells, coeff_int_cells_h); + Kokkos::deep_copy(this->p_cell_out, p_cell_out_h); + + // Call the function under test + edges2cells_scalar_lib<TypeParam>( + this->p_edge_in.data(), this->edge_idx.data(), + this->edge_blk.data(), this->coeff_int_cells.data(), + this->p_cell_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_e, nblks_c, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_cells(12); + int idx = 0; + std::generate(expected_cells.begin(), expected_cells.end(), [&idx]() { + TypeParam values[] = { + 1.37677, 1.7201, 1.47977, 1.8231, 1.58277, 1.9261, + 1.3802, 1.72353, 1.4832, 1.82653, 1.5862, 1.92953 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], + expected_cells[cell_at(jc, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jc; + } + } + } +} + +TYPED_TEST(InterpolationScalarSingleParamTest, Edges2CellsRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_c = this->nblks_c; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 3, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_edge_in_h = Kokkos::create_mirror_view(this->p_edge_in); + auto edge_idx_h = Kokkos::create_mirror_view(this->edge_idx); + auto edge_blk_h = Kokkos::create_mirror_view(this->edge_blk); + auto coeff_int_cells_h = Kokkos::create_mirror_view(this->coeff_int_cells); + auto p_cell_out_h = Kokkos::create_mirror_view(this->p_cell_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_e - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_edge_in_h[edge_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // Initialize cell connectivity indices with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell connects to 3 edges + for (int j = 0; j < 3; ++j) { + edge_idx_h[idx_at(ic, ib, j)] = edge_distrib(gen); + edge_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + + // Random interpolation coefficients + coeff_int_cells_h[coeff_at(ic, j, ib)] = real_distrib(gen) / 3.0; // Scaled to ensure reasonable sums + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_cell_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_edge_in, p_edge_in_h); + Kokkos::deep_copy(this->edge_idx, edge_idx_h); + Kokkos::deep_copy(this->edge_blk, edge_blk_h); + Kokkos::deep_copy(this->coeff_int_cells, coeff_int_cells_h); + Kokkos::deep_copy(this->p_cell_out, p_cell_out_h); + + // Call the function under test edges2cells_scalar_lib<TypeParam>( - this->p_edge_in.data(), this->edge_idx.data(), this->edge_blk.data(), - this->coeff_int_cells.data(), this->p_cell_out.data(), this->i_startblk, - this->i_endblk, this->i_startidx, this->i_endidx, this->slev, this->elev, - this->nproma, this->nlev, this->nblks_e, this->nblks_c, this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, - // expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + this->p_edge_in.data(), this->edge_idx.data(), + this->edge_blk.data(), this->coeff_int_cells.data(), + this->p_cell_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_e, nblks_c, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_cells("expected_cells", nproma, nlev, nblks_c); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + expected_cells(jc, jk, jb) = 0.0; + + for (int j = 0; j < 3; ++j) { + int edge_index = edge_idx_h[idx_at(jc, jb, j)]; + int edge_block = edge_blk_h[blk_at(jc, jb, j)]; + TypeParam coeff = coeff_int_cells_h[coeff_at(jc, j, jb)]; + + expected_cells(jc, jk, jb) += coeff * p_edge_in_h[edge_at(edge_index, jk, edge_block)]; + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], + expected_cells(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jc; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! verts2cells +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(InterpolationScalarSingleParamTest, Verts2CellsSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_v = this->nblks_v; + constexpr int nblks_c = this->nblks_c; + constexpr int npromz_c = this->npromz_c; + + // Define indexing helpers + const auto &vert_at = at<nproma, nlev, nblks_v>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 3, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vert_in_h = Kokkos::create_mirror_view(this->p_vert_in); + auto cell_index_idx_h = Kokkos::create_mirror_view(this->cell_index_idx); + auto cell_index_blk_h = Kokkos::create_mirror_view(this->cell_index_blk); + auto coeff_int_cells_h = Kokkos::create_mirror_view(this->coeff_int_cells); + auto p_cell_out_h = Kokkos::create_mirror_view(this->p_cell_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_vert_in_h[vert_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize cell connectivity indices with specific pattern + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell connects to 3 vertices + for (int j = 0; j < 3; ++j) { + // Vertex indices with a pattern + cell_index_idx_h[idx_at(ic, ib, j)] = (ic + j) % nproma; + cell_index_blk_h[blk_at(ic, ib, j)] = (ib + j % 2) % nblks_v; + + // Interpolation coefficients that depend on indices + coeff_int_cells_h[coeff_at(ic, j, ib)] = static_cast<TypeParam>(1.0 / 3.0 + j * 0.01); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_cell_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vert_in, p_vert_in_h); + Kokkos::deep_copy(this->cell_index_idx, cell_index_idx_h); + Kokkos::deep_copy(this->cell_index_blk, cell_index_blk_h); + Kokkos::deep_copy(this->coeff_int_cells, coeff_int_cells_h); + Kokkos::deep_copy(this->p_cell_out, p_cell_out_h); + + // Call the function under test + verts2cells_scalar_lib<TypeParam>( + this->p_vert_in.data(), this->cell_index_idx.data(), + this->cell_index_blk.data(), this->coeff_int_cells.data(), + this->p_cell_out.data(), nblks_c, npromz_c, this->slev, this->elev, + nproma, nlev, nblks_v, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_cells(12); + int idx = 0; + std::generate(expected_cells.begin(), expected_cells.end(), [&idx]() { + TypeParam values[] = { + 1.37677, 1.7201, 1.47977, 1.8231, 1.58277, 1.9261, + 1.3802, 1.72353, 1.4832, 1.82653, 1.5862, 1.92953 + }; + return values[idx++]; + }); + + // Verify results - check the same ranges as in the expected calculation + for (int jb = 0; jb < nblks_c; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + int nlen = (jb != nblks_c - 1) ? nproma : npromz_c; + int start_idx = (jb >= this->i_startblk && jb <= this->i_endblk) ? this->i_startidx : 0; + int end_idx = (jb >= this->i_startblk && jb <= this->i_endblk) ? this->i_endidx : nlen - 1; + + for (int jc = start_idx; jc <= end_idx; ++jc) { + EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], + expected_cells[cell_at(jc, jk, jb)], static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } } } -TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { +TYPED_TEST(InterpolationScalarSingleParamTest, Verts2CellsRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_v = this->nblks_v; + constexpr int nblks_c = this->nblks_c; + constexpr int npromz_c = this->npromz_c; + + // Define indexing helpers + const auto &vert_at = at<nproma, nlev, nblks_v>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 3, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vert_in_h = Kokkos::create_mirror_view(this->p_vert_in); + auto cell_index_idx_h = Kokkos::create_mirror_view(this->cell_index_idx); + auto cell_index_blk_h = Kokkos::create_mirror_view(this->cell_index_blk); + auto coeff_int_cells_h = Kokkos::create_mirror_view(this->coeff_int_cells); + auto p_cell_out_h = Kokkos::create_mirror_view(this->p_cell_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> vert_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_v - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vert_in_h[vert_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // Initialize cell connectivity indices with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell connects to 3 vertices + for (int j = 0; j < 3; ++j) { + cell_index_idx_h[idx_at(ic, ib, j)] = vert_distrib(gen); + cell_index_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + + // Random interpolation coefficients + coeff_int_cells_h[coeff_at(ic, j, ib)] = real_distrib(gen) / 3.0; // Scaled to ensure reasonable sums + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_cell_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vert_in, p_vert_in_h); + Kokkos::deep_copy(this->cell_index_idx, cell_index_idx_h); + Kokkos::deep_copy(this->cell_index_blk, cell_index_blk_h); + Kokkos::deep_copy(this->coeff_int_cells, coeff_int_cells_h); + Kokkos::deep_copy(this->p_cell_out, p_cell_out_h); + // Call the function under test verts2cells_scalar_lib<TypeParam>( this->p_vert_in.data(), this->cell_index_idx.data(), this->cell_index_blk.data(), this->coeff_int_cells.data(), - this->p_cell_out.data(), this->nblks_c, this->npromz_c, this->slev, - this->elev, this->nproma, this->nlev, this->nblks_v, this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 3 stencil points, - // expect 3. - EXPECT_NEAR(this->p_cell_out[idx], static_cast<TypeParam>(3), + this->p_cell_out.data(), nblks_c, npromz_c, this->slev, this->elev, + nproma, nlev, nblks_v, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_cells("expected_cells", nproma, nlev, nblks_c); + + // Compute expected values + for (int jb = 0; jb < nblks_c; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + int nlen = (jb != nblks_c - 1) ? nproma : npromz_c; + for (int jc = 0; jc < nlen; ++jc) { + expected_cells(jc, jk, jb) = 0.0; + + for (int j = 0; j < 3; ++j) { + int vert_index = cell_index_idx_h[idx_at(jc, jb, j)]; + int vert_block = cell_index_blk_h[blk_at(jc, jb, j)]; + TypeParam coeff = coeff_int_cells_h[coeff_at(jc, j, jb)]; + + expected_cells(jc, jk, jb) += coeff * p_vert_in_h[vert_at(vert_index, jk, vert_block)]; + } + } + } + } + + // Verify results + for (int jb = 0; jb < nblks_c; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + int nlen = (jb != nblks_c - 1) ? nproma : npromz_c; + for (int jc = 0; jc < nlen; ++jc) { + EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], + expected_cells(jc, jk, jb), static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } } @@ -289,48 +967,229 @@ TYPED_TEST(InterpolationScalarTypedTestFixture, Verts2Cells) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarTypedTestFixture, AvgLib) { +TYPED_TEST(InterpolationScalarSingleParamTest, CellAvgLibSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + + // Define indexing helpers + const auto &psi_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 4, nblks_c>; // 4 coefficients (self + 3 neighbors) + const auto &avg_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto psi_c_h = Kokkos::create_mirror_view(this->psi_c); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto avg_psi_c_h = Kokkos::create_mirror_view(this->avg_psi_c); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + psi_c_h[psi_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } - // Call the function - cell_avg_lib<TypeParam>(this->psi_c.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), - this->avg_coeff.data(), this->avg_psi_c.data(), - this->i_startblk, this->i_endblk, this->i_startidx, - this->i_endidx, this->slev, this->elev, this->nproma, - this->nlev, this->nblks_c, this->lacc); - - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 4 stencil points, - // expect 4. - EXPECT_NEAR(this->avg_psi_c[idx], static_cast<TypeParam>(4), + // Initialize cell neighbor indices with specific pattern + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell has 3 neighboring cells + for (int j = 0; j < 3; ++j) { + // Neighbor indices with a pattern + cell_neighbor_idx_h[idx_at(ic, ib, j)] = (ic + j + 1) % nproma; + cell_neighbor_blk_h[blk_at(ic, ib, j)] = (ib + j % 2) % nblks_c; + } + + // Averaging coefficients - one for the cell itself and one for each neighbor + avg_coeff_h[coeff_at(ic, 0, ib)] = static_cast<TypeParam>(0.4); // Self weight + avg_coeff_h[coeff_at(ic, 1, ib)] = static_cast<TypeParam>(0.2); // First neighbor + avg_coeff_h[coeff_at(ic, 2, ib)] = static_cast<TypeParam>(0.2); // Second neighbor + avg_coeff_h[coeff_at(ic, 3, ib)] = static_cast<TypeParam>(0.2); // Third neighbor + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + avg_psi_c_h[avg_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->psi_c, psi_c_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->avg_psi_c, avg_psi_c_h); + + // Call the function under test + cell_avg_lib<TypeParam>( + this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->avg_coeff.data(), + this->avg_psi_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_c, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(avg_psi_c_h, this->avg_psi_c); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_avg(12); + int idx = 0; + std::generate(expected_avg.begin(), expected_avg.end(), [&idx]() { + TypeParam values[] = { + 1.402, 1.602, 1.502, 1.702, 1.602, 1.802, + 1.408, 1.608, 1.508, 1.708, 1.608, 1.808 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + EXPECT_NEAR(avg_psi_c_h[avg_at(jc, jk, jb)], + expected_avg[avg_at(jc, jk, jb)], static_cast<TypeParam>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + << "Failure at block " << jb << ", level " << jk << ", index " << jc; + } + } + } +} + +TYPED_TEST(InterpolationScalarSingleParamTest, CellAvgLibRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + + // Define indexing helpers + const auto &psi_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_c, 3>; + const auto &blk_at = at<nproma, nblks_c, 3>; + const auto &coeff_at = at<nproma, 4, nblks_c>; // 4 coefficients (self + 3 neighbors) + const auto &avg_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto psi_c_h = Kokkos::create_mirror_view(this->psi_c); + auto cell_neighbor_idx_h = Kokkos::create_mirror_view(this->cell_neighbor_idx); + auto cell_neighbor_blk_h = Kokkos::create_mirror_view(this->cell_neighbor_blk); + auto avg_coeff_h = Kokkos::create_mirror_view(this->avg_coeff); + auto avg_psi_c_h = Kokkos::create_mirror_view(this->avg_psi_c); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> cell_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_c - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.01, 1.0); + std::uniform_real_distribution<TypeParam> coeff_distrib(0.01, 0.5); // Keep coefficients reasonable + + // Initialize with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + psi_c_h[psi_at(ic, ik, ib)] = real_distrib(gen); + } + } + } + + // Initialize cell neighbor indices with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell has 3 neighboring cells + for (int j = 0; j < 3; ++j) { + cell_neighbor_idx_h[idx_at(ic, ib, j)] = cell_distrib(gen); + cell_neighbor_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + } + + avg_coeff_h[coeff_at(ic, 0, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + avg_coeff_h[coeff_at(ic, 1, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + avg_coeff_h[coeff_at(ic, 2, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + avg_coeff_h[coeff_at(ic, 3, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + avg_psi_c_h[avg_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->psi_c, psi_c_h); + Kokkos::deep_copy(this->cell_neighbor_idx, cell_neighbor_idx_h); + Kokkos::deep_copy(this->cell_neighbor_blk, cell_neighbor_blk_h); + Kokkos::deep_copy(this->avg_coeff, avg_coeff_h); + Kokkos::deep_copy(this->avg_psi_c, avg_psi_c_h); + + // Call the function under test + cell_avg_lib<TypeParam>( + this->psi_c.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->avg_coeff.data(), + this->avg_psi_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_c, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(avg_psi_c_h, this->avg_psi_c); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_avg("expected_avg", nproma, nlev, nblks_c); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + // Self contribution + expected_avg(jc, jk, jb) = + psi_c_h[psi_at(jc, jk, jb)] * avg_coeff_h[coeff_at(jc, 0, jb)]; + + // Neighbor contributions + for (int j = 0; j < 3; ++j) { + int neighbor_idx = cell_neighbor_idx_h[idx_at(jc, jb, j)]; + int neighbor_blk = cell_neighbor_blk_h[blk_at(jc, jb, j)]; + TypeParam coeff = avg_coeff_h[coeff_at(jc, j+1, jb)]; + + expected_avg(jc, jk, jb) += + psi_c_h[psi_at(neighbor_idx, jk, neighbor_blk)] * coeff; + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { + EXPECT_NEAR(avg_psi_c_h[avg_at(jc, jk, jb)], + expected_avg(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } } } template <typename TypePair> -class InterpolationScalarMixedTestFixture : public ::testing::Test, +class InterpolationScalarDoubleParamTest : public ::testing::Test, public interp_dimensions { -public: + protected: using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + // Arrays used for cells2edges - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> edge_cell_idx; // Dimensions: (nproma, nblks_e, 2) - std::vector<int> edge_cell_blk; // Dimensions: (nproma, nblks_e, 2) - std::vector<OutType> coeff_int_edges; // Dimensions: (nproma, 2, nblks_e) - std::vector<OutType> p_edge_out; // Dimensions: (nproma, nlev, nblks_e) + Kokkos::View<InType*, memory_space> p_cell_in; + Kokkos::View<int*, memory_space> edge_cell_idx; + Kokkos::View<int*, memory_space> edge_cell_blk; + Kokkos::View<OutType*, memory_space> coeff_int_edges; + Kokkos::View<OutType*, memory_space> p_edge_out; // Further parameters for cells2edges const int patch_id = 0; @@ -342,41 +1201,31 @@ public: std::vector<int> i_endidx_in; // Dimensions: (2) // Arrays used for cells2verts - std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) - std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<OutType> coeff_int_verts; // Dimensions: (nproma, 6, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - - InterpolationScalarMixedTestFixture() { - // Allocate and initialize arrays needed for cells2edges - p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), - static_cast<InType>(1)); - edge_cell_idx.resize(num_elements_3d<int>(nproma, nblks_e, 2), 1); - edge_cell_blk.resize(num_elements_3d<int>(nproma, nblks_e, 2), 0); - coeff_int_edges.resize(num_elements_3d<InType>(nproma, 2, nblks_e), - static_cast<OutType>(1)); - - p_edge_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_e), - static_cast<OutType>(0)); - + Kokkos::View<int*, memory_space> vert_cell_idx; + Kokkos::View<int*, memory_space> vert_cell_blk; + Kokkos::View<OutType*, memory_space> coeff_int_verts; + Kokkos::View<OutType*, memory_space> p_vert_out; + + InterpolationScalarDoubleParamTest() + : p_cell_in("p_cell_in", nproma * nlev * nblks_c), + edge_cell_idx("edge_cell_idx", nproma * nblks_e * 2), + edge_cell_blk("edge_cell_blk", nproma * nblks_e * 2), + coeff_int_edges("coeff_int_edges", nproma * 2 * nblks_e), + p_edge_out("p_edge_out", nproma * nlev * nblks_e), + vert_cell_idx("vert_cell_idx", nproma * nblks_v * 6), + vert_cell_blk("vert_cell_blk", nproma * nblks_v * 6), + coeff_int_verts("coeff_int_verts", nproma * 6 * nblks_v), + p_vert_out("p_vert_out", nproma * nlev * nblks_v) + { // Allocate neighbour indexes for cells2edges i_startblk_in.resize(2, i_startblk); i_endblk_in.resize(2, i_endblk); i_startidx_in.resize(2, i_startidx); i_endidx_in.resize(2, i_endidx); - - // Allocate & Initialize arrays needed for cells2verts - vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); - vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int_verts.resize(num_elements_3d<InType>(nproma, 6, nblks_v), - static_cast<OutType>(1)); - - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), - static_cast<OutType>(0)); } }; -TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); +TYPED_TEST_SUITE(InterpolationScalarDoubleParamTest, MixedTypesSP2DP); //////////////////////////////////////////////////////////////////////////////// // @@ -384,34 +1233,237 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) { +TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2EdgesSpecific) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &edge_idx_at = at<nproma, nblks_e, 2>; + const auto &edge_blk_at = at<nproma, nblks_e, 2>; + const auto &coeff_at = at<nproma, 2, nblks_e>; + const auto &edge_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto edge_cell_idx_h = Kokkos::create_mirror_view(this->edge_cell_idx); + auto edge_cell_blk_h = Kokkos::create_mirror_view(this->edge_cell_blk); + auto coeff_int_edges_h = Kokkos::create_mirror_view(this->coeff_int_edges); + auto p_edge_out_h = Kokkos::create_mirror_view(this->p_edge_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } - // Call the function + // Initialize edge connectivity indices with specific pattern + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each edge connects to 2 cells + edge_cell_idx_h[edge_idx_at(ic, ib, 0)] = ic % nproma; // First cell index + edge_cell_idx_h[edge_idx_at(ic, ib, 1)] = (ic + 1) % nproma; // Second cell index + + edge_cell_blk_h[edge_blk_at(ic, ib, 0)] = ib % nblks_c; // First cell block + edge_cell_blk_h[edge_blk_at(ic, ib, 1)] = (ib + 1) % nblks_c; // Second cell block + + // Interpolation coefficients that depend on indices + coeff_int_edges_h[coeff_at(ic, 0, ib)] = static_cast<OutType>(0.5 + ic * 0.01); + coeff_int_edges_h[coeff_at(ic, 1, ib)] = static_cast<OutType>(0.5 - ic * 0.01); + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_edge_out_h[edge_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->edge_cell_idx, edge_cell_idx_h); + Kokkos::deep_copy(this->edge_cell_blk, edge_cell_blk_h); + Kokkos::deep_copy(this->coeff_int_edges, coeff_int_edges_h); + Kokkos::deep_copy(this->p_edge_out, p_edge_out_h); + + // Call the function under test + cells2edges_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->edge_cell_idx.data(), + this->edge_cell_blk.data(), this->coeff_int_edges.data(), + this->p_edge_out.data(), this->i_startblk_in.data(), + this->i_endblk_in.data(), this->i_startidx_in.data(), + this->i_endidx_in.data(), this->slev, this->elev, nproma, + nlev, nblks_c, nblks_e, this->patch_id, + this->l_limited_area, this->lfill_latbc, this->lacc); + + // Copy results back to host + Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); + + int i_startblk = this->i_startblk_in[1]; + int i_endblk = this->i_endblk_in[1]; + int i_startidx_range = this->i_startidx_in[1]; + int i_endidx_range = this->i_endidx_in[1]; + + // Expected results based on the specific test values + std::vector<OutType> expected_edges(12); + int idx = 0; + std::generate(expected_edges.begin(), expected_edges.end(), [&idx]() { + OutType values[] = { + 1.505, 1.5149, 1.605, 1.6149, 1.705, 1.7149, + 1.505, 1.5151, 1.605, 1.6151, 1.705, 1.7151 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_range, i_endidx_range, nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + EXPECT_NEAR(p_edge_out_h[edge_at(je, jk, jb)], + expected_edges[edge_at(je, jk, jb)], + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << je; + } + } + } +} + +TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2EdgesRandom) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &edge_idx_at = at<nproma, nblks_e, 2>; + const auto &edge_blk_at = at<nproma, nblks_e, 2>; + const auto &coeff_at = at<nproma, 2, nblks_e>; + const auto &edge_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto edge_cell_idx_h = Kokkos::create_mirror_view(this->edge_cell_idx); + auto edge_cell_blk_h = Kokkos::create_mirror_view(this->edge_cell_blk); + auto coeff_int_edges_h = Kokkos::create_mirror_view(this->coeff_int_edges); + auto p_edge_out_h = Kokkos::create_mirror_view(this->p_edge_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> cell_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_c - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(real_distrib(gen)); + } + } + } + + // Initialize edge connectivity indices with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each edge connects to 2 cells + edge_cell_idx_h[edge_idx_at(ic, ib, 0)] = cell_distrib(gen); + edge_cell_idx_h[edge_idx_at(ic, ib, 1)] = cell_distrib(gen); + + edge_cell_blk_h[edge_blk_at(ic, ib, 0)] = block_distrib(gen); + edge_cell_blk_h[edge_blk_at(ic, ib, 1)] = block_distrib(gen); + + coeff_int_edges_h[coeff_at(ic, 0, ib)] = static_cast<OutType>(real_distrib(gen)); + coeff_int_edges_h[coeff_at(ic, 1, ib)] = static_cast<OutType>(real_distrib(gen)); + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_edge_out_h[edge_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->edge_cell_idx, edge_cell_idx_h); + Kokkos::deep_copy(this->edge_cell_blk, edge_cell_blk_h); + Kokkos::deep_copy(this->coeff_int_edges, coeff_int_edges_h); + Kokkos::deep_copy(this->p_edge_out, p_edge_out_h); + + // Call the function under test cells2edges_scalar_lib<InType, OutType>( this->p_cell_in.data(), this->edge_cell_idx.data(), this->edge_cell_blk.data(), this->coeff_int_edges.data(), this->p_edge_out.data(), this->i_startblk_in.data(), this->i_endblk_in.data(), this->i_startidx_in.data(), - this->i_endidx_in.data(), this->slev, this->elev, this->nproma, - this->nlev, this->nblks_c, this->nblks_e, this->patch_id, + this->i_endidx_in.data(), this->slev, this->elev, nproma, + nlev, nblks_c, nblks_e, this->patch_id, this->l_limited_area, this->lfill_latbc, this->lacc); - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 2 stencil points, - // expect 2. - EXPECT_NEAR(this->p_edge_out[idx], static_cast<OutType>(2), + // Copy results back to host + Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<OutType***, host_space> expected_edges("expected_edges", nproma, nlev, nblks_e); + + // Since we're not testing the lateral boundary condition filling + // (this->l_limited_area == false && this->lfill_latbc == false), + // we only need to check the blocks in i_startblk_in[1] to i_endblk_in[1] + int i_startblk = this->i_startblk_in[1]; + int i_endblk = this->i_endblk_in[1]; + int i_startidx_range = this->i_startidx_in[1]; + int i_endidx_range = this->i_endidx_in[1]; + + // Compute expected values + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + // Get the actual indices to process for this block + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_range, i_endidx_range, nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + expected_edges(je, jk, jb) = + static_cast<OutType>(coeff_int_edges_h[coeff_at(je, 0, jb)] * + p_cell_in_h[cell_at(edge_cell_idx_h[edge_idx_at(je, jb, 0)], + jk, + edge_cell_blk_h[edge_blk_at(je, jb, 0)])]) + + static_cast<OutType>(coeff_int_edges_h[coeff_at(je, 1, jb)] * + p_cell_in_h[cell_at(edge_cell_idx_h[edge_idx_at(je, jb, 1)], + jk, + edge_cell_blk_h[edge_blk_at(je, jb, 1)])]); + } + } + } + + // Verify results + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_e_lib(i_startidx_range, i_endidx_range, nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + EXPECT_NEAR(p_edge_out_h[edge_at(je, jk, jb)], + expected_edges(je, jk, jb), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + << "Failure at block " << jb << ", level " << jk << ", index " << je; } } } @@ -423,31 +1475,220 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) { +TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2VertsSpecific) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto vert_cell_idx_h = Kokkos::create_mirror_view(this->vert_cell_idx); + auto vert_cell_blk_h = Kokkos::create_mirror_view(this->vert_cell_blk); + auto coeff_int_verts_h = Kokkos::create_mirror_view(this->coeff_int_verts); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + // Initialize vertex connectivity indices with specific pattern + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex is connected to 6 cells + for (int j = 0; j < 6; ++j) { + // Cell indices with a pattern + vert_cell_idx_h[idx_at(ic, ib, j)] = (ic + j) % nproma; + vert_cell_blk_h[blk_at(ic, ib, j)] = (ib + j % nblks_c) % nblks_c; + + // Interpolation coefficients that depend on indices + coeff_int_verts_h[coeff_at(ic, j, ib)] = static_cast<OutType>(1.0 / 6.0 + j * 0.01); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->vert_cell_idx, vert_cell_idx_h); + Kokkos::deep_copy(this->vert_cell_blk, vert_cell_blk_h); + Kokkos::deep_copy(this->coeff_int_verts, coeff_int_verts_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + + // Call the function under test cells2verts_scalar_lib<InType, OutType>( this->p_cell_in.data(), this->vert_cell_idx.data(), this->vert_cell_blk.data(), this->coeff_int_verts.data(), this->p_vert_out.data(), this->i_startblk, this->i_endblk, - this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, - this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_c, nblks_v, this->lacc, this->acc_async); + + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Expected results based on the specific test values + std::vector<OutType> expected_verts(12); + int idx = 0; + std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { + OutType values[] = { + 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, + 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts[vert_at(jv, jk, jb)], + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; + } + } + } +} - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, - // expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), - static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; +TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2VertsRandom) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto vert_cell_idx_h = Kokkos::create_mirror_view(this->vert_cell_idx); + auto vert_cell_blk_h = Kokkos::create_mirror_view(this->vert_cell_blk); + auto coeff_int_verts_h = Kokkos::create_mirror_view(this->coeff_int_verts); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> cell_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_c - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + std::uniform_real_distribution<double> coeff_distrib(0.01, 0.3); // Keep coefficients reasonable + + // Initialize with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(real_distrib(gen)); + } + } + } + + // Initialize vertex connectivity indices with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex is connected to 6 cells + for (int j = 0; j < 6; ++j) { + vert_cell_idx_h[idx_at(ic, ib, j)] = cell_distrib(gen); + vert_cell_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + + // Normalized coefficients + coeff_int_verts_h[coeff_at(ic, j, ib)] = static_cast<OutType>(coeff_distrib(gen)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->vert_cell_idx, vert_cell_idx_h); + Kokkos::deep_copy(this->vert_cell_blk, vert_cell_blk_h); + Kokkos::deep_copy(this->coeff_int_verts, coeff_int_verts_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + + // Call the function under test + cells2verts_scalar_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int_verts.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_c, nblks_v, this->lacc, this->acc_async); + + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<OutType***, host_space> expected_verts("expected_verts", nproma, nlev, nblks_v); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + // Get the actual indices to process for this block + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + expected_verts(jv, jk, jb) = static_cast<OutType>(0.0); + + for (int j = 0; j < 6; ++j) { + int cell_idx = vert_cell_idx_h[idx_at(jv, jb, j)]; + int cell_blk = vert_cell_blk_h[blk_at(jv, jb, j)]; + OutType coeff = coeff_int_verts_h[coeff_at(jv, j, jb)]; + + expected_verts(jv, jk, jb) += + static_cast<OutType>(coeff * p_cell_in_h[cell_at(cell_idx, jk, cell_blk)]); + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts(jv, jk, jb), + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } } @@ -460,7 +1701,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) { //////////////////////////////////////////////////////////////////////////////// // The test for cells2verts_ri is similar to cells2verts, but is done here -// separtely to avoid as a differebt template instantiation is needed for the +// separtely to avoid as a different template instantiation is needed for the // function call template <typename Types> class Cells2vertsriScalarLibTestFixture : public testing::Test, @@ -469,36 +1710,102 @@ public: using InType = typename Types::in_type; using OutType = typename Types::out_type; + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + // Arrays stored in std::vector. - std::vector<InType> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) - std::vector<int> vert_cell_idx; // Dimensions: (nproma, nblks_v, 6) - std::vector<int> vert_cell_blk; // Dimensions: (nproma, nblks_v, 6) - std::vector<InType> coeff_int; // Dimensions: (nproma, 6, nblks_v) - std::vector<OutType> p_vert_out; // Dimensions: (nproma, nlev, nblks_v) - - Cells2vertsriScalarLibTestFixture() { - // Allocate and initialize inputs. - p_cell_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_c), - static_cast<InType>(1)); - vert_cell_idx.resize(num_elements_3d<int>(nproma, nblks_v, 6), 1); - vert_cell_blk.resize(num_elements_3d<int>(nproma, nblks_v, 6), 0); - coeff_int.resize(num_elements_3d<InType>(nproma, 6, nblks_v), - static_cast<InType>(1)); - - // Allocate output arrays and initialize to zero. - p_vert_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), - static_cast<OutType>(0)); - } + Kokkos::View<InType*, memory_space> p_cell_in; + Kokkos::View<int*, memory_space> vert_cell_idx; + Kokkos::View<int*, memory_space> vert_cell_blk; + Kokkos::View<InType*, memory_space> coeff_int; + Kokkos::View<OutType*, memory_space> p_vert_out; + + Cells2vertsriScalarLibTestFixture() + : p_cell_in("p_cell_in", nproma * nlev * nblks_c), + vert_cell_idx("vert_cell_idx", nproma * nblks_v * 6), + vert_cell_blk("vert_cell_blk", nproma * nblks_v * 6), + coeff_int("coeff_int", nproma * 6 * nblks_v), + p_vert_out("p_vert_out", nproma * nlev * nblks_v) + {} }; // Add test suite TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test -TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) { +TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRISpecific) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + + // For output, we need to handle different layouts depending on __LOOP_EXCHANGE + // This is a special case for this function +#ifdef __LOOP_EXCHANGE + const auto &vert_at = at<nproma, nlev, nblks_c>; // jv, jk, jb order +#else + const auto &vert_at = at<nlev, nproma, nblks_c>; // jk, jv, jb order +#endif + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto vert_cell_idx_h = Kokkos::create_mirror_view(this->vert_cell_idx); + auto vert_cell_blk_h = Kokkos::create_mirror_view(this->vert_cell_blk); + auto coeff_int_h = Kokkos::create_mirror_view(this->coeff_int); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Initialize with index-based test values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize vertex connectivity indices with specific pattern + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex is connected to 6 cells + for (int j = 0; j < 6; ++j) { + // Cell indices with a pattern + vert_cell_idx_h[idx_at(ic, ib, j)] = (ic + j) % nproma; + vert_cell_blk_h[blk_at(ic, ib, j)] = (ib + j % nblks_c) % nblks_c; + + // Interpolation coefficients that depend on indices + coeff_int_h[coeff_at(ic, j, ib)] = static_cast<OutType>(1.0 / 6.0 + j * 0.01); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + // Handle different indexing depending on __LOOP_EXCHANGE +#ifdef __LOOP_EXCHANGE + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); +#else + p_vert_out_h[vert_at(ik, ic, ib)] = static_cast<OutType>(0.0); +#endif + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->vert_cell_idx, vert_cell_idx_h); + Kokkos::deep_copy(this->vert_cell_blk, vert_cell_blk_h); + Kokkos::deep_copy(this->coeff_int, coeff_int_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + // Call the function cells2verts_scalar_ri_lib<InType, OutType>( this->p_cell_in.data(), this->vert_cell_idx.data(), @@ -507,25 +1814,195 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) { this->i_startidx, this->i_endidx, this->slev, this->elev, this->nproma, this->nlev, this->nblks_c, this->nblks_v, this->lacc, this->acc_async); - // Check the outputs only for blocks in the range - // { [i_startblk, i_endblk], [slev,elev], [i_startidx_in, i_endidx_in] } - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = this->slev; level < this->elev; ++level) { - for (int i = this->i_startidx; i < this->i_endidx; ++i) { - // Compute the linear index for a 3D array in column-major order: + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Expected results based on the specific test values + std::vector<OutType> expected_verts(12); + int idx = 0; + std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { + OutType values[] = { #ifdef __LOOP_EXCHANGE - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; + 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, + 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 #else - size_t idx = level + i * this->nlev + block * this->nproma * this->nlev; + 1.7459, 1.8609, 1.9759, 1.7159, 1.8309, 1.9459, + 1.7456, 1.8606, 1.9756, 1.7156, 1.8306, 1.9456 +#endif + }; + return values[idx++]; + }); + + std::cout << "p_vert_out_h: " << std::endl; + // print out the array p_vert_out_h in one line + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + // std::cout << p_vert_out_h[vert_at(jk, jv, jb)] << ", "; + std::cout << p_vert_out_h[vert_at(jv, jk, jb)] << ", "; + } + } + } + + // Verify results - using the appropriate indexing depending on __LOOP_EXCHANGE + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { +#ifdef __LOOP_EXCHANGE + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts[vert_at(jv, jk, jb)], + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; +#else + EXPECT_NEAR(p_vert_out_h[vert_at(jk, jv, jb)], + expected_verts[vert_at(jk, jv, jb)], + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; #endif - // Since every contribution is 1 and there are 6 stencil points, - // expect 6. - EXPECT_NEAR(this->p_vert_out[idx], static_cast<OutType>(6), - static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; } } } } + +TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRIRandom) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_v = this->nblks_v; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<nproma, nblks_v, 6>; + const auto &blk_at = at<nproma, nblks_v, 6>; + const auto &coeff_at = at<nproma, 6, nblks_v>; + + // For output, we need to handle different layouts depending on __LOOP_EXCHANGE +#ifdef __LOOP_EXCHANGE + const auto &vert_at = at<nproma, nlev, nblks_v>; // jv, jk, jb order +#else + const auto &vert_at = at<nlev, nproma, nblks_v>; // jk, jv, jb order +#endif + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto vert_cell_idx_h = Kokkos::create_mirror_view(this->vert_cell_idx); + auto vert_cell_blk_h = Kokkos::create_mirror_view(this->vert_cell_blk); + auto coeff_int_h = Kokkos::create_mirror_view(this->coeff_int); + auto p_vert_out_h = Kokkos::create_mirror_view(this->p_vert_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> cell_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_c - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + std::uniform_real_distribution<double> coeff_distrib(0.01, 0.3); // Keep coefficients reasonable + + // Initialize with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<InType>(real_distrib(gen)); + } + } + } + + // Initialize vertex connectivity indices with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex is connected to 6 cells + for (int j = 0; j < 6; ++j) { + vert_cell_idx_h[idx_at(ic, ib, j)] = cell_distrib(gen); + vert_cell_blk_h[blk_at(ic, ib, j)] = block_distrib(gen); + + // Normalized coefficients + coeff_int_h[coeff_at(ic, j, ib)] = static_cast<InType>(coeff_distrib(gen)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + // Handle different indexing depending on __LOOP_EXCHANGE +#ifdef __LOOP_EXCHANGE + p_vert_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); +#else + p_vert_out_h[vert_at(ik, ic, ib)] = static_cast<OutType>(0.0); +#endif + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->vert_cell_idx, vert_cell_idx_h); + Kokkos::deep_copy(this->vert_cell_blk, vert_cell_blk_h); + Kokkos::deep_copy(this->coeff_int, coeff_int_h); + Kokkos::deep_copy(this->p_vert_out, p_vert_out_h); + + // Call the function + cells2verts_scalar_ri_lib<InType, OutType>( + this->p_cell_in.data(), this->vert_cell_idx.data(), + this->vert_cell_blk.data(), this->coeff_int.data(), + this->p_vert_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx, this->i_endidx, this->slev, this->elev, + nproma, nlev, nblks_c, nblks_v, this->lacc, this->acc_async); + + // Copy results back to host + Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); + + // Prepare expected results storage + using host_space = Kokkos::HostSpace; + Kokkos::View<OutType***, host_space> expected_verts("expected_verts", nproma, nlev, nblks_v); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + // Get the actual indices to process for this block + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + expected_verts(jv, jk, jb) = static_cast<OutType>(0.0); + + for (int j = 0; j < 6; ++j) { + int cell_idx = vert_cell_idx_h[idx_at(jv, jb, j)]; + int cell_blk = vert_cell_blk_h[blk_at(jv, jb, j)]; + InType coeff = coeff_int_h[coeff_at(jv, j, jb)]; + + expected_verts(jv, jk, jb) += + static_cast<OutType>(coeff * p_cell_in_h[cell_at(cell_idx, jk, cell_blk)]); + } + } + } + } + + // Verify results - using the appropriate indexing depending on __LOOP_EXCHANGE + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx, this->i_endidx, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { +#ifdef __LOOP_EXCHANGE + EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], + expected_verts(jv, jk, jb), + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; +#else + EXPECT_NEAR(p_vert_out_h[vert_at(jk, jv, jb)], + expected_verts(jv, jk, jb), + static_cast<OutType>(1e-5)) + << "Failure at block " << jb << ", level " << jk << ", index " << jv; +#endif + } + } + } +} + -- GitLab From e11d455517806132b15682d57d47623c3d074950 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 8 Apr 2025 18:28:45 +0200 Subject: [PATCH 19/34] fixed few bugs in mo_lib_intp_rbf --- src/interpolation/mo_lib_intp_rbf.cpp | 82 +++++++++++++-------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp index d1178a6..ce6e238 100644 --- a/src/interpolation/mo_lib_intp_rbf.cpp +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -180,62 +180,62 @@ void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, "rbf_interpol_c2grad", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { grad_x_view(jc, jk, jb) = - rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) + - rbf_c2grad_coeff_view(1, 1, jc, jb) * + rbf_c2grad_coeff_view(0, 0, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + - rbf_c2grad_coeff_view(2, 1, jc, jb) * + rbf_c2grad_coeff_view(2, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + - rbf_c2grad_coeff_view(3, 1, jc, jb) * + rbf_c2grad_coeff_view(3, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + - rbf_c2grad_coeff_view(4, 1, jc, jb) * + rbf_c2grad_coeff_view(4, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + - rbf_c2grad_coeff_view(5, 1, jc, jb) * + rbf_c2grad_coeff_view(5, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + - rbf_c2grad_coeff_view(6, 1, jc, jb) * + rbf_c2grad_coeff_view(6, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + - rbf_c2grad_coeff_view(7, 1, jc, jb) * + rbf_c2grad_coeff_view(7, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + - rbf_c2grad_coeff_view(8, 1, jc, jb) * + rbf_c2grad_coeff_view(8, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + - rbf_c2grad_coeff_view(9, 1, jc, jb) * + rbf_c2grad_coeff_view(9, 0, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); grad_y_view(jc, jk, jb) = - rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) + - rbf_c2grad_coeff_view(1, 2, jc, jb) * + rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + - rbf_c2grad_coeff_view(2, 2, jc, jb) * + rbf_c2grad_coeff_view(2, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + - rbf_c2grad_coeff_view(3, 2, jc, jb) * + rbf_c2grad_coeff_view(3, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + - rbf_c2grad_coeff_view(4, 2, jc, jb) * + rbf_c2grad_coeff_view(4, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + - rbf_c2grad_coeff_view(5, 2, jc, jb) * + rbf_c2grad_coeff_view(5, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + - rbf_c2grad_coeff_view(6, 2, jc, jb) * + rbf_c2grad_coeff_view(6, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + - rbf_c2grad_coeff_view(7, 2, jc, jb) * + rbf_c2grad_coeff_view(7, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + - rbf_c2grad_coeff_view(8, 2, jc, jb) * + rbf_c2grad_coeff_view(8, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + - rbf_c2grad_coeff_view(9, 2, jc, jb) * + rbf_c2grad_coeff_view(9, 1, jc, jb) * p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); }); @@ -270,10 +270,10 @@ void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, nblks_c); UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblks_c); - UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, - nblks_c); // TODO + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, rbf_vec_dim_c, 2, nproma, + nblks_c); UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); - UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c); for (int jb = i_startblk; jb <= i_endblk; ++jb) { @@ -288,60 +288,60 @@ void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, "rbf_vec_interpol_cell_lib", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { p_u_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 1, jc, jb) * + rbf_vec_coeff_c_view(0, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 1, jc, jb) * + rbf_vec_coeff_c_view(1, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 1, jc, jb) * + rbf_vec_coeff_c_view(2, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 1, jc, jb) * + rbf_vec_coeff_c_view(3, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 1, jc, jb) * + rbf_vec_coeff_c_view(4, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 1, jc, jb) * + rbf_vec_coeff_c_view(5, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 1, jc, jb) * + rbf_vec_coeff_c_view(6, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 1, jc, jb) * + rbf_vec_coeff_c_view(7, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 1, jc, jb) * + rbf_vec_coeff_c_view(8, 0, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); p_v_out_view(jc, jk, jb) = - rbf_vec_coeff_c_view(0, 2, jc, jb) * + rbf_vec_coeff_c_view(0, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) + - rbf_vec_coeff_c_view(1, 2, jc, jb) * + rbf_vec_coeff_c_view(1, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) + - rbf_vec_coeff_c_view(2, 2, jc, jb) * + rbf_vec_coeff_c_view(2, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) + - rbf_vec_coeff_c_view(3, 2, jc, jb) * + rbf_vec_coeff_c_view(3, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) + - rbf_vec_coeff_c_view(4, 2, jc, jb) * + rbf_vec_coeff_c_view(4, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) + - rbf_vec_coeff_c_view(5, 2, jc, jb) * + rbf_vec_coeff_c_view(5, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) + - rbf_vec_coeff_c_view(6, 2, jc, jb) * + rbf_vec_coeff_c_view(6, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) + - rbf_vec_coeff_c_view(7, 2, jc, jb) * + rbf_vec_coeff_c_view(7, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) + - rbf_vec_coeff_c_view(8, 2, jc, jb) * + rbf_vec_coeff_c_view(8, 1, jc, jb) * p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb)); }); -- GitLab From 677c30b83f322f46621c7907f93a14d10812df3b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 8 Apr 2025 18:29:11 +0200 Subject: [PATCH 20/34] made an overhaul of test_intp_rbf --- test/c/CMakeLists.txt | 2 +- test/c/test_intp_rbf.cpp | 1271 ++++++++++++++++++++++++++++++++------ 2 files changed, 1080 insertions(+), 193 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index e707970..175b226 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -34,7 +34,7 @@ set(SOURCES test_horizontal_rot.cpp # test_tdma_solver.cpp test_interpolation_vector.cpp - # test_intp_rbf.cpp + test_intp_rbf.cpp test_interpolation_scalar.cpp ) # Create the test executable from your test files, including main.cpp. diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 040d440..af72e65 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -15,15 +15,9 @@ #include <gtest/gtest.h> #include <numeric> #include <vector> - -// Free-function helpers for 3D and 4D array sizes (assumed column-major) -template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { - return static_cast<size_t>(d1) * d2 * d3; -} - -template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { - return static_cast<size_t>(d1) * d2 * d3 * d4; -} +#include <random> +#include <iostream> +#include "dim_helper.hpp" // Define a helper struct that holds the two types. template <typename InT, typename OutT> struct MixedPrecision { @@ -31,6 +25,9 @@ template <typename InT, typename OutT> struct MixedPrecision { using out_type = OutT; }; +// Define the list of types we want to test. +typedef ::testing::Types<float, double> MyTypes; + // Define the list of type pairs we want to test. typedef ::testing::Types<MixedPrecision<double, double>, MixedPrecision<double, float>, @@ -48,6 +45,7 @@ public: static constexpr int rbf_c2grad_dim = 10; // fixed dimension static constexpr int rbf_vec_dim_c = 9; static constexpr int rbf_vec_dim_e = 4; + static constexpr int rbf_vec_dim_v = 6; // Fixed dimension for RBF // Parameter values. const int i_startblk = 0; @@ -60,244 +58,1133 @@ public: const bool acc_async = false; // No asynchronous execution. }; -// Define a typed test fixture for the functions which have the same input and -// output types template <typename T> -class RbfInterpolTypedTestFixture : public ::testing::Test, +class RbfInterpolSingleParamTest : public ::testing::Test, public interp_dimensions { public: - // Data arrays. - std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c - std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c - std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c - std::vector<int> rbf_vec_idx_c; // size: rbf_vec_dim_c * nproma * nblks_c - std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c - std::vector<T> - rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c - std::vector<T> grad_x; // size: nproma * nlev * nblks_c - std::vector<T> grad_y; // size: nproma * nlev * nblks_c - std::vector<T> p_vn_in; - std::vector<T> rbf_vec_coeff_c; - std::vector<T> p_u_out; - std::vector<T> p_v_out; - - std::vector<int> rbf_vec_idx_e; - std::vector<int> rbf_vec_blk_e; - std::vector<T> rbf_vec_coeff_e; - std::vector<T> p_vt_out; - - RbfInterpolTypedTestFixture() { - size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; - size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; - size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; - - size_t size3d_vec_dim = - static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; - size_t size_4d_vec_dim = - static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; - - size_t size3d_edge_lib = - static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; - size_t size_4d_edge_lib = - static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; - - p_cell_in.resize(size3d, static_cast<T>(1)); - p_vn_in.resize(size3d, static_cast<T>(1)); - - rbf_vec_idx_c.resize(size3d_vec_dim, 1); - rbf_vec_blk_c.resize(size3d_vec_dim, 0); - rbf_c2grad_idx.resize(size3d_idx, 1); - rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. - rbf_vec_idx_e.resize(size3d_vec_dim, 1); - rbf_vec_blk_e.resize(size3d_vec_dim, 0); - - rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1)); - rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); - rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1)); - - p_u_out.resize(size3d_vec_dim, static_cast<T>(0)); - p_v_out.resize(size3d_vec_dim, static_cast<T>(0)); - p_vt_out.resize(size3d_edge_lib, static_cast<T>(0)); - - grad_x.resize(size3d, static_cast<T>(0)); - grad_y.resize(size3d, static_cast<T>(0)); - } + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Kokkos Views for test data + Kokkos::View<T*, memory_space> p_cell_in; // Dimensions: (nproma, nlev, nblks_c) + Kokkos::View<int*, memory_space> rbf_c2grad_idx; // Dimensions: (rbf_c2grad_dim, nproma, nblks_c) + Kokkos::View<int*, memory_space> rbf_c2grad_blk; // Dimensions: (rbf_c2grad_dim, nproma, nblks_c) + Kokkos::View<T*, memory_space> rbf_c2grad_coeff; // Dimensions: (rbf_c2grad_dim, 2, nproma, nblks_c) + Kokkos::View<T*, memory_space> grad_x; // Dimensions: (nproma, nlev, nblks_c) + Kokkos::View<T*, memory_space> grad_y; // Dimensions: (nproma, nlev, nblks_c) + + // Additional arrays for other functions + Kokkos::View<T*, memory_space> p_vn_in; + Kokkos::View<int*, memory_space> rbf_vec_idx_c; + Kokkos::View<int*, memory_space> rbf_vec_blk_c; + Kokkos::View<T*, memory_space> rbf_vec_coeff_c; + Kokkos::View<T*, memory_space> p_u_out; + Kokkos::View<T*, memory_space> p_v_out; + + Kokkos::View<int*, memory_space> rbf_vec_idx_e; + Kokkos::View<int*, memory_space> rbf_vec_blk_e; + Kokkos::View<T*, memory_space> rbf_vec_coeff_e; + Kokkos::View<T*, memory_space> p_vt_out; + + RbfInterpolSingleParamTest() + : p_cell_in("p_cell_in", nproma * nlev * nblks_c), + rbf_c2grad_idx("rbf_c2grad_idx", rbf_c2grad_dim * nproma * nblks_c), + rbf_c2grad_blk("rbf_c2grad_blk", rbf_c2grad_dim * nproma * nblks_c), + rbf_c2grad_coeff("rbf_c2grad_coeff", rbf_c2grad_dim * 2 * nproma * nblks_c), + grad_x("grad_x", nproma * nlev * nblks_c), + grad_y("grad_y", nproma * nlev * nblks_c), + + p_vn_in("p_vn_in", nproma * nlev * nblks_c), + rbf_vec_idx_c("rbf_vec_idx_c", rbf_vec_dim_c * nproma * nblks_c), + rbf_vec_blk_c("rbf_vec_blk_c", rbf_vec_dim_c * nproma * nblks_c), + rbf_vec_coeff_c("rbf_vec_coeff_c", rbf_vec_dim_c * 2 * nproma * nblks_c), + p_u_out("p_u_out", nproma * nlev * nblks_c), + p_v_out("p_v_out", nproma * nlev * nblks_c), + + rbf_vec_idx_e("rbf_vec_idx_e", rbf_vec_dim_e * nproma * nblks_c), + rbf_vec_blk_e("rbf_vec_blk_e", rbf_vec_dim_e * nproma * nblks_c), + rbf_vec_coeff_e("rbf_vec_coeff_e", rbf_vec_dim_e * 2 * nproma * nblks_c), + p_vt_out("p_vt_out", nproma * nlev * nblks_c) + {} }; -typedef ::testing::Types<float, double> MyTypes; +TYPED_TEST_SUITE(RbfInterpolSingleParamTest, MyTypes); + +//////////////////////////////////////////////////////////////////////////////// +// +// ! rbf_interpol_c2grad +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(RbfInterpolSingleParamTest, C2GradSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int rbf_c2grad_dim = this->rbf_c2grad_dim; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<rbf_c2grad_dim, nproma, nblks_c>; + const auto &blk_at = at<rbf_c2grad_dim, nproma, nblks_c>; + const auto &coeff_at = at<rbf_c2grad_dim, 2, nproma, nblks_c>; + const auto &grad_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto rbf_c2grad_idx_h = Kokkos::create_mirror_view(this->rbf_c2grad_idx); + auto rbf_c2grad_blk_h = Kokkos::create_mirror_view(this->rbf_c2grad_blk); + auto rbf_c2grad_coeff_h = Kokkos::create_mirror_view(this->rbf_c2grad_coeff); + auto grad_x_h = Kokkos::create_mirror_view(this->grad_x); + auto grad_y_h = Kokkos::create_mirror_view(this->grad_y); -TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); + // Initialize with index-based pattern for cell data + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize connectivity indices with specific pattern + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // First index points to self + rbf_c2grad_idx_h[idx_at(0, ic, ib)] = ic; + rbf_c2grad_blk_h[blk_at(0, ic, ib)] = ib; + + // Other indices follow a pattern + for (int j = 1; j < rbf_c2grad_dim; ++j) { + rbf_c2grad_idx_h[idx_at(j, ic, ib)] = (ic + j) % nproma; + rbf_c2grad_blk_h[blk_at(j, ic, ib)] = (ib + j % nblks_c) % nblks_c; + } + + // Coefficients for x and y gradients - use a simple pattern that depends on ib, ic and j + for (int j = 0; j < rbf_c2grad_dim; ++j) { + rbf_c2grad_coeff_h[coeff_at(j, 0, ic, ib)] = static_cast<TypeParam>(1.0 + ib + 0.1*ic + 0.01 * j); // x coefficient + rbf_c2grad_coeff_h[coeff_at(j, 1, ic, ib)] = static_cast<TypeParam>(2.0 + ib + 0.1*ic + 0.01 * j); // y coefficient + } + } + } -TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { - using T = TypeParam; + // Initialize gradients to zero + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + grad_x_h[grad_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + grad_y_h[grad_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->rbf_c2grad_idx, rbf_c2grad_idx_h); + Kokkos::deep_copy(this->rbf_c2grad_blk, rbf_c2grad_blk_h); + Kokkos::deep_copy(this->rbf_c2grad_coeff, rbf_c2grad_coeff_h); + Kokkos::deep_copy(this->grad_x, grad_x_h); + Kokkos::deep_copy(this->grad_y, grad_y_h); + + Kokkos::fence(); + + // Call the function rbf_interpol_c2grad_lib<TypeParam>( this->p_cell_in.data(), this->rbf_c2grad_idx.data(), this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(), this->grad_x.data(), this->grad_y.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, - this->lacc); - - // For each block from i_startblk to i_endblk-1, and for each (i, level) - // the kernel sums rbf_c2grad_dim contributions, each equal to 1. - // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10). - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - for (int jk = 0; jk < this->nlev; ++jk) { - for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + - static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->grad_x[idx], - static_cast<TypeParam>(this->rbf_c2grad_dim), - static_cast<TypeParam>(1e-5)) - << "grad_x failure at block " << jb << ", level " << jk - << ", index " << i; - EXPECT_NEAR(this->grad_y[idx], - static_cast<TypeParam>(this->rbf_c2grad_dim), - static_cast<TypeParam>(1e-5)) - << "grad_y failure at block " << jb << ", level " << jk - << ", index " << i; + this->elev, nproma, rbf_c2grad_dim, nlev, nblks_c, this->lacc); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(grad_x_h, this->grad_x); + Kokkos::deep_copy(grad_y_h, this->grad_y); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_grad_x(24); + std::vector<TypeParam> expected_grad_y(24); + int idx = 0; + std::generate(expected_grad_x.begin(), expected_grad_x.end(), [&idx]() { + TypeParam values[] = { + 19.9225, 22.9275, 26.2225, 20.9675, 24.0725, 27.4675, + 22.0125, 25.2175, 28.7125, 23.0575, 26.3625, 29.9575, + 38.972, 42.977, 47.272, 41.017, 45.122, 49.517, + 43.062, 47.267, 51.762, 45.107, 49.412, 54.007 + }; + return values[idx++]; + }); + + idx = 0; + std::generate(expected_grad_y.begin(), expected_grad_y.end(), [&idx]() { + TypeParam values[] = { + 38.9725, 42.9775, 47.2725, 41.0175, 45.1225, 49.5175, + 43.0625, 47.2675, 51.7625, 45.1075, 49.4125, 54.0075, + 58.022, 63.027, 68.322, 61.067, 66.172, 71.567, + 64.112, 69.317, 74.812, 67.157, 72.462, 78.057 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + EXPECT_NEAR(grad_x_h[grad_at(jc, jk, jb)], + expected_grad_x[grad_at(jc, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk << ", index " << jc; + EXPECT_NEAR(grad_y_h[grad_at(jc, jk, jb)], + expected_grad_y[grad_at(jc, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk << ", index " << jc; } } } } -TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { - using T = TypeParam; +TYPED_TEST(RbfInterpolSingleParamTest, C2GradRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int rbf_c2grad_dim = this->rbf_c2grad_dim; + + // Define indexing helpers + const auto &cell_at = at<nproma, nlev, nblks_c>; + const auto &idx_at = at<rbf_c2grad_dim, nproma, nblks_c>; + const auto &blk_at = at<rbf_c2grad_dim, nproma, nblks_c>; + const auto &coeff_at = at<rbf_c2grad_dim, 2, nproma, nblks_c>; + const auto &grad_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_cell_in_h = Kokkos::create_mirror_view(this->p_cell_in); + auto rbf_c2grad_idx_h = Kokkos::create_mirror_view(this->rbf_c2grad_idx); + auto rbf_c2grad_blk_h = Kokkos::create_mirror_view(this->rbf_c2grad_blk); + auto rbf_c2grad_coeff_h = Kokkos::create_mirror_view(this->rbf_c2grad_coeff); + auto grad_x_h = Kokkos::create_mirror_view(this->grad_x); + auto grad_y_h = Kokkos::create_mirror_view(this->grad_y); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> cell_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_c - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + std::uniform_real_distribution<double> coeff_distrib(-0.2, 0.2); // Allow negative coefficients for gradients + + // Initialize with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_cell_in_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(real_distrib(gen)); + } + } + } + + // Initialize connectivity indices with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // First index points to self + rbf_c2grad_idx_h[idx_at(0, ic, ib)] = ic; + rbf_c2grad_blk_h[blk_at(0, ic, ib)] = ib; + + // Other indices randomized + for (int j = 1; j < rbf_c2grad_dim; ++j) { + rbf_c2grad_idx_h[idx_at(j, ic, ib)] = cell_distrib(gen); + rbf_c2grad_blk_h[blk_at(j, ic, ib)] = block_distrib(gen); + } + + // Random coefficients for gradient reconstruction + for (int j = 0; j < rbf_c2grad_dim; ++j) { + rbf_c2grad_coeff_h[coeff_at(j, 0, ic, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); // x coefficient + rbf_c2grad_coeff_h[coeff_at(j, 1, ic, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); // y coefficient + } + } + } + + // Initialize gradients to zero + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + grad_x_h[grad_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + grad_y_h[grad_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_cell_in, p_cell_in_h); + Kokkos::deep_copy(this->rbf_c2grad_idx, rbf_c2grad_idx_h); + Kokkos::deep_copy(this->rbf_c2grad_blk, rbf_c2grad_blk_h); + Kokkos::deep_copy(this->rbf_c2grad_coeff, rbf_c2grad_coeff_h); + Kokkos::deep_copy(this->grad_x, grad_x_h); + Kokkos::deep_copy(this->grad_y, grad_y_h); + + Kokkos::fence(); + + // Call the function + rbf_interpol_c2grad_lib<TypeParam>( + this->p_cell_in.data(), this->rbf_c2grad_idx.data(), + this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(), + this->grad_x.data(), this->grad_y.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, nproma, rbf_c2grad_dim, nlev, nblks_c, this->lacc); + + Kokkos::fence(); - rbf_vec_interpol_cell_lib<T>( + // Copy results back to host + Kokkos::deep_copy(grad_x_h, this->grad_x); + Kokkos::deep_copy(grad_y_h, this->grad_y); + + // Calculate expected values + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_grad_x("expected_grad_x", nproma, nlev, nblks_c); + Kokkos::View<TypeParam***, host_space> expected_grad_y("expected_grad_y", nproma, nlev, nblks_c); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + expected_grad_x(jc, jk, jb) = static_cast<TypeParam>(0.0); + expected_grad_y(jc, jk, jb) = static_cast<TypeParam>(0.0); + + for (int j = 0; j < rbf_c2grad_dim; ++j) { + int cell_idx = rbf_c2grad_idx_h[idx_at(j, jc, jb)]; + int cell_blk = rbf_c2grad_blk_h[blk_at(j, jc, jb)]; + TypeParam coeff_x = rbf_c2grad_coeff_h[coeff_at(j, 0, jc, jb)]; + TypeParam coeff_y = rbf_c2grad_coeff_h[coeff_at(j, 1, jc, jb)]; + + expected_grad_x(jc, jk, jb) += + coeff_x * p_cell_in_h[cell_at(cell_idx, jk, cell_blk)]; + expected_grad_y(jc, jk, jb) += + coeff_y * p_cell_in_h[cell_at(cell_idx, jk, cell_blk)]; + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + EXPECT_NEAR(grad_x_h[grad_at(jc, jk, jb)], + expected_grad_x(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk << ", index " << jc; + EXPECT_NEAR(grad_y_h[grad_at(jc, jk, jb)], + expected_grad_y(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk << ", index " << jc; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! rbf_vec_interpol_cell +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(RbfInterpolSingleParamTest, CellSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int rbf_vec_dim_c = this->rbf_vec_dim_c; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_c, nproma, nblks_c>; + const auto &blk_at = at<rbf_vec_dim_c, nproma, nblks_c>; + const auto &coeff_at = at<rbf_vec_dim_c, 2, nproma, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto rbf_vec_idx_c_h = Kokkos::create_mirror_view(this->rbf_vec_idx_c); + auto rbf_vec_blk_c_h = Kokkos::create_mirror_view(this->rbf_vec_blk_c); + auto rbf_vec_coeff_c_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_c); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Initialize with index-based pattern for edge data + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_vn_in_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize cell connectivity indices with specific pattern + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each cell connects to rbf_vec_dim_c edges + for (int j = 0; j < rbf_vec_dim_c; ++j) { + // Edge indices with a pattern + rbf_vec_idx_c_h[idx_at(j, ic, ib)] = (ic + j) % nproma; + rbf_vec_blk_c_h[blk_at(j, ic, ib)] = (ib + j % nblks_e) % nblks_e; + + // Interpolation coefficients that depend on indices + rbf_vec_coeff_c_h[coeff_at(j, 0, ic, ib)] = static_cast<TypeParam>(1.0 + ib + 0.1*ic + 0.01 * j); // x coefficient + rbf_vec_coeff_c_h[coeff_at(j, 1, ic, ib)] = static_cast<TypeParam>(2.0 + ib + 0.1*ic + 0.01 * j); // y coefficient + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + p_v_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_c, rbf_vec_idx_c_h); + Kokkos::deep_copy(this->rbf_vec_blk_c, rbf_vec_blk_c_h); + Kokkos::deep_copy(this->rbf_vec_coeff_c, rbf_vec_coeff_c_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function + rbf_vec_interpol_cell_lib<TypeParam>( this->p_vn_in.data(), this->rbf_vec_idx_c.data(), this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(), this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, - this->nblks_e, this->lacc, this->acc_async); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - for (int jk = 0; jk < this->nlev; ++jk) { - for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + - static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), - static_cast<T>(1e-5)) - << "p_u_out failure at block " << jb << ", level " << jk - << ", index " << i; + this->elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, + this->lacc, this->acc_async); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_u(24); + std::vector<TypeParam> expected_v(24); + int idx = 0; + std::generate(expected_u.begin(), expected_u.end(), [&idx]() { + TypeParam values[] = { + 18.8216, 20.5356, 22.3396, 19.7576, 21.5616, 23.4556, + 20.6936, 22.5876, 24.5716, 21.6296, 23.6136, 25.6876, + 36.882, 38.597, 40.402, 38.718, 40.523, 42.418, + 40.554, 42.449, 44.434, 42.39, 44.375, 46.45 + }; + return values[idx++]; + }); + idx = 0; + std::generate(expected_v.begin(), expected_v.end(), [&idx]() { + TypeParam values[] = { + 36.8616, 38.5756, 40.3796, 38.6976, 40.5016, 42.3956, + 40.5336, 42.4276, 44.4116, 42.3696, 44.3536, 46.4276, + 54.932, 56.647, 58.452, 57.668, 59.473, 61.368, + 60.404, 62.299, 64.284, 63.14, 65.125, 67.2 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + EXPECT_NEAR(p_u_out_h[cell_at(jc, jk, jb)], + expected_u[cell_at(jc, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "u failure at block " << jb << ", level " << jk << ", index " << jc; + + EXPECT_NEAR(p_v_out_h[cell_at(jc, jk, jb)], + expected_v[cell_at(jc, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "v failure at block " << jb << ", level " << jk << ", index " << jc; } } } } -TYPED_TEST(RbfInterpolTypedTestFixture, Edge) { - using T = TypeParam; +TYPED_TEST(RbfInterpolSingleParamTest, CellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int rbf_vec_dim_c = this->rbf_vec_dim_c; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_c, nproma, nblks_c>; + const auto &blk_at = at<rbf_vec_dim_c, nproma, nblks_c>; + const auto &coeff_at = at<rbf_vec_dim_c, 2, nproma, nblks_c>; + const auto &cell_at = at<nproma, nlev, nblks_c>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto rbf_vec_idx_c_h = Kokkos::create_mirror_view(this->rbf_vec_idx_c); + auto rbf_vec_blk_c_h = Kokkos::create_mirror_view(this->rbf_vec_blk_c); + auto rbf_vec_coeff_c_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_c); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(43); // Different seed from other tests + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_e - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + std::uniform_real_distribution<double> coeff_distrib(0.01, 0.2); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vn_in_h[edge_at(ic, ik, ib)] = static_cast<TypeParam>(real_distrib(gen)); + } + } + } + + // Initialize cell connectivity indices with random values + for (int ib = 0; ib < nblks_c; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + for (int j = 0; j < rbf_vec_dim_c; ++j) { + // Random edge indices and blocks + rbf_vec_idx_c_h[idx_at(j, ic, ib)] = edge_distrib(gen); + rbf_vec_blk_c_h[blk_at(j, ic, ib)] = block_distrib(gen); + // Random coefficients for interpolation + rbf_vec_coeff_c_h[coeff_at(j, 0, ic, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + rbf_vec_coeff_c_h[coeff_at(j, 1, ic, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + p_v_out_h[cell_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_c, rbf_vec_idx_c_h); + Kokkos::deep_copy(this->rbf_vec_blk_c, rbf_vec_blk_c_h); + Kokkos::deep_copy(this->rbf_vec_coeff_c, rbf_vec_coeff_c_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function + rbf_vec_interpol_cell_lib<TypeParam>( + this->p_vn_in.data(), this->rbf_vec_idx_c.data(), + this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, + this->lacc, this->acc_async); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Calculate expected values + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_u("expected_u", nproma, nlev, nblks_c); + Kokkos::View<TypeParam***, host_space> expected_v("expected_v", nproma, nlev, nblks_c); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + expected_u(jc, jk, jb) = static_cast<TypeParam>(0.0); + expected_v(jc, jk, jb) = static_cast<TypeParam>(0.0); + + for (int j = 0; j < rbf_vec_dim_c; ++j) { + int edge_idx = rbf_vec_idx_c_h[idx_at(j, jc, jb)]; + int edge_blk = rbf_vec_blk_c_h[blk_at(j, jc, jb)]; + TypeParam coeff_u = rbf_vec_coeff_c_h[coeff_at(j, 0, jc, jb)]; + TypeParam coeff_v = rbf_vec_coeff_c_h[coeff_at(j, 1, jc, jb)]; + + expected_u(jc, jk, jb) += + coeff_u * p_vn_in_h[edge_at(edge_idx, jk, edge_blk)]; + expected_v(jc, jk, jb) += + coeff_v * p_vn_in_h[edge_at(edge_idx, jk, edge_blk)]; + } + } + } + } - rbf_vec_interpol_edge_lib<T>( + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jc = i_startidx; jc <= i_endidx; ++jc) { + EXPECT_NEAR(p_u_out_h[cell_at(jc, jk, jb)], + expected_u(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "u failure at block " << jb << ", level " << jk << ", index " << jc; + + EXPECT_NEAR(p_v_out_h[cell_at(jc, jk, jb)], + expected_v(jc, jk, jb), + static_cast<TypeParam>(1e-5)) + << "v failure at block " << jb << ", level " << jk << ", index " << jc; + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +// ! rbf_vec_interpol_edge +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(RbfInterpolSingleParamTest, EdgeSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int rbf_vec_dim_e = this->rbf_vec_dim_e; + + // Define indexing helpers + const auto &vn_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &blk_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &coeff_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &vt_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto rbf_vec_idx_e_h = Kokkos::create_mirror_view(this->rbf_vec_idx_e); + auto rbf_vec_blk_e_h = Kokkos::create_mirror_view(this->rbf_vec_blk_e); + auto rbf_vec_coeff_e_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_e); + auto p_vt_out_h = Kokkos::create_mirror_view(this->p_vt_out); + + // Initialize with index-based pattern for edge data + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_vn_in_h[vn_at(ic, ik, ib)] = static_cast<TypeParam>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize edge connectivity indices with specific pattern + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each edge uses rbf_vec_dim_e neighboring edges + for (int j = 0; j < rbf_vec_dim_e; ++j) { + // Edge indices with a pattern + rbf_vec_idx_e_h[idx_at(j, ic, ib)] = (ic + j) % nproma; + rbf_vec_blk_e_h[blk_at(j, ic, ib)] = (ib + j % nblks_e) % nblks_e; + + // Interpolation coefficients that depend on indices + rbf_vec_coeff_e_h[coeff_at(j, ic, ib)] = static_cast<TypeParam>(1.0 + ib + 0.1*ic + 0.01 * j); // coefficient + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vt_out_h[vt_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_e, rbf_vec_idx_e_h); + Kokkos::deep_copy(this->rbf_vec_blk_e, rbf_vec_blk_e_h); + Kokkos::deep_copy(this->rbf_vec_coeff_e, rbf_vec_coeff_e_h); + Kokkos::deep_copy(this->p_vt_out, p_vt_out_h); + + Kokkos::fence(); + + // Call the function + rbf_vec_interpol_edge_lib<TypeParam>( + this->p_vn_in.data(), this->rbf_vec_idx_e.data(), + this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(), + this->p_vt_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, + nlev, nproma, rbf_vec_dim_e, nblks_e, this->lacc, this->acc_async); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_vt_out_h, this->p_vt_out); + + // Expected results based on the specific test values + std::vector<TypeParam> expected_vt(24); + int idx = 0; + std::generate(expected_vt.begin(), expected_vt.end(), [&idx]() { + TypeParam values[] = { + 7.1304, 8.9324, 10.9644, 7.5364, 9.3784, 11.4504, + 7.9424, 9.8244, 11.9364, 8.3484, 10.2704, 12.4224, + 14.1502, 16.9522, 19.9842, 14.9562, 17.7982, 20.8702, + 15.7622, 18.6442, 21.7562, 16.5682, 19.4902, 22.6422, + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + EXPECT_NEAR(p_vt_out_h[vt_at(je, jk, jb)], + expected_vt[vt_at(je, jk, jb)], + static_cast<TypeParam>(1e-5)) + << "Tangential velocity failure at block " << jb << ", level " << jk << ", index " << je; + } + } + } +} + +TYPED_TEST(RbfInterpolSingleParamTest, EdgeRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int rbf_vec_dim_e = this->rbf_vec_dim_e; + + // Define indexing helpers + const auto &vn_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &blk_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &coeff_at = at<rbf_vec_dim_e, nproma, nblks_e>; + const auto &vt_at = at<nproma, nlev, nblks_e>; + + // Create host mirror views + auto p_vn_in_h = Kokkos::create_mirror_view(this->p_vn_in); + auto rbf_vec_idx_e_h = Kokkos::create_mirror_view(this->rbf_vec_idx_e); + auto rbf_vec_blk_e_h = Kokkos::create_mirror_view(this->rbf_vec_blk_e); + auto rbf_vec_coeff_e_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_e); + auto p_vt_out_h = Kokkos::create_mirror_view(this->p_vt_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(44); // Different seed from other tests + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_e - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + std::uniform_real_distribution<double> coeff_distrib(0.01, 0.5); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_vn_in_h[vn_at(ic, ik, ib)] = static_cast<TypeParam>(real_distrib(gen)); + } + } + } + + // Initialize edge connectivity indices with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + + for (int j = 0; j < rbf_vec_dim_e; ++j) { + // Random edge indices and blocks + rbf_vec_idx_e_h[idx_at(j, ic, ib)] = edge_distrib(gen); + rbf_vec_blk_e_h[blk_at(j, ic, ib)] = block_distrib(gen); + // Random coefficients for interpolation + rbf_vec_coeff_e_h[coeff_at(j, ic, ib)] = static_cast<TypeParam>(coeff_distrib(gen)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_vt_out_h[vt_at(ic, ik, ib)] = static_cast<TypeParam>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_vn_in, p_vn_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_e, rbf_vec_idx_e_h); + Kokkos::deep_copy(this->rbf_vec_blk_e, rbf_vec_blk_e_h); + Kokkos::deep_copy(this->rbf_vec_coeff_e, rbf_vec_coeff_e_h); + Kokkos::deep_copy(this->p_vt_out, p_vt_out_h); + + Kokkos::fence(); + + // Call the function + rbf_vec_interpol_edge_lib<TypeParam>( this->p_vn_in.data(), this->rbf_vec_idx_e.data(), this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(), this->p_vt_out.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, - this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc, - this->acc_async); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - for (int jk = 0; jk < this->nlev; ++jk) { - for (int i = 0; i < this->nproma; ++i) { - size_t idx = i + static_cast<size_t>(jk) * this->nproma + - static_cast<size_t>(jb) * this->nproma * this->nlev; - EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), - static_cast<T>(1e-5)) - << "p_vt_out failure at block " << jb << ", level " << jk - << ", index " << i; + nlev, nproma, rbf_vec_dim_e, nblks_e, this->lacc, this->acc_async); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_vt_out_h, this->p_vt_out); + + // Calculate expected values + using host_space = Kokkos::HostSpace; + Kokkos::View<TypeParam***, host_space> expected_vt("expected_vt", nproma, nlev, nblks_e); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + expected_vt(je, jk, jb) = static_cast<TypeParam>(0.0); + + for (int j = 0; j < rbf_vec_dim_e; ++j) { + int edge_idx = rbf_vec_idx_e_h[idx_at(j, je, jb)]; + int edge_blk = rbf_vec_blk_e_h[blk_at(j, je, jb)]; + TypeParam coeff = rbf_vec_coeff_e_h[coeff_at(j, je, jb)]; + + expected_vt(je, jk, jb) += + coeff * p_vn_in_h[vn_at(edge_idx, jk, edge_blk)]; + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int je = i_startidx; je <= i_endidx; ++je) { + EXPECT_NEAR(p_vt_out_h[vt_at(je, jk, jb)], + expected_vt(je, jk, jb), + static_cast<TypeParam>(1e-5)) + << "Tangential velocity failure at block " << jb << ", level " << jk << ", index " << je; } } } } -// Define a typed test fixture for the functions which have different input and -// output types template <typename TypePair> -class RbfVecInterpolMixedTestFixture : public ::testing::Test, +class RbfVecInterpolDoubleParamTest : public ::testing::Test, public interp_dimensions { public: using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; - // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = - 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = - 6; // fixed dimension for rbf vector (stencil points) + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Kokkos Views for test data + Kokkos::View<InType*, memory_space> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + Kokkos::View<int*, memory_space> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim_v, nproma, nblks_v) + Kokkos::View<int*, memory_space> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim_v, nproma, nblks_v) + Kokkos::View<InType*, memory_space> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim_v, 2, nproma, nblks_v) + Kokkos::View<OutType*, memory_space> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + Kokkos::View<OutType*, memory_space> p_v_out; // Dimensions: (nproma, nlev, nblks_v) - // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] - int i_startidx_in = 0; - int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. - - // Arrays stored in std::vector. - std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<InType> - rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) - std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) - std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) - - RbfVecInterpolMixedTestFixture() { - // Allocate and initialize inputs. - p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), - static_cast<InType>(1)); - rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); - rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); - rbf_vec_coeff_v.resize( - num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), - static_cast<InType>(1)); - - // Allocate output arrays and initialize to zero. - p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), - static_cast<OutType>(0)); - p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), - static_cast<OutType>(0)); - } + RbfVecInterpolDoubleParamTest() + : p_e_in("p_e_in", nproma * nlev * nblks_e), + rbf_vec_idx_v("rbf_vec_idx_v", rbf_vec_dim_v * nproma * nblks_v), + rbf_vec_blk_v("rbf_vec_blk_v", rbf_vec_dim_v * nproma * nblks_v), + rbf_vec_coeff_v("rbf_vec_coeff_v", rbf_vec_dim_v * 2 * nproma * nblks_v), + p_u_out("p_u_out", nproma * nlev * nblks_v), + p_v_out("p_v_out", nproma * nlev * nblks_v) + {} }; -TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); +TYPED_TEST_SUITE(RbfVecInterpolDoubleParamTest, MixedTypes); + +//////////////////////////////////////////////////////////////////////////////// +// +// ! rbf_vec_interpol_vertex +// +//////////////////////////////////////////////////////////////////////////////// + +TYPED_TEST(RbfVecInterpolDoubleParamTest, VertexSpecific) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + constexpr int rbf_vec_dim_v = this->rbf_vec_dim_v; + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_v, nproma, nblks_v>; + const auto &blk_at = at<rbf_vec_dim_v, nproma, nblks_v>; + const auto &coeff_at = at<rbf_vec_dim_v, 2, nproma, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; + + // Create host mirror views + auto p_e_in_h = Kokkos::create_mirror_view(this->p_e_in); + auto rbf_vec_idx_v_h = Kokkos::create_mirror_view(this->rbf_vec_idx_v); + auto rbf_vec_blk_v_h = Kokkos::create_mirror_view(this->rbf_vec_blk_v); + auto rbf_vec_coeff_v_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_v); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Initialize with index-based pattern for edge data + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + // Value depends on all three indices + p_e_in_h[edge_at(ic, ik, ib)] = static_cast<InType>(1.0 + ic + ik * 0.1 + ib * 0.01); + } + } + } + + // Initialize vertex connectivity indices with specific pattern + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + // Each vertex connects to 6 edges + for (int j = 0; j < rbf_vec_dim_v; ++j) { + // Edge indices with a pattern + rbf_vec_idx_v_h[idx_at(j, ic, ib)] = (ic + j) % nproma; + rbf_vec_blk_v_h[blk_at(j, ic, ib)] = (ib + j % nblks_e) % nblks_e; + + // Interpolation coefficients that depend on indices + rbf_vec_coeff_v_h[coeff_at(j, 0, ic, ib)] = static_cast<InType>(1.0 + ib + 0.1*ic + 0.01 * j); // x coefficient + rbf_vec_coeff_v_h[coeff_at(j, 1, ic, ib)] = static_cast<InType>(2.0 + ib + 0.1*ic + 0.01 * j); // y coefficient + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + p_v_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_e_in, p_e_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_v, rbf_vec_idx_v_h); + Kokkos::deep_copy(this->rbf_vec_blk_v, rbf_vec_blk_v_h); + Kokkos::deep_copy(this->rbf_vec_coeff_v, rbf_vec_coeff_v_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function + rbf_vec_interpol_vertex_lib<InType, OutType>( + this->p_e_in.data(), this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, nproma, this->lacc, this->acc_async, nlev, + nblks_e, nblks_v); + + Kokkos::fence(); -TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) { + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Expected results based on the specific test values + std::vector<OutType> expected_u(24); + std::vector<OutType> expected_v(24); + int idx = 0; + std::generate(expected_u.begin(), expected_u.end(), [&idx]() { + OutType values[] = { + 12.3709, 13.5139, 14.7169, 12.9859, 14.1889, 15.4519, + 13.6009, 14.8639, 16.1869, 14.2159, 15.5389, 16.9219, + 24.4006, 25.5436, 26.7466, 25.6156, 26.8186, 28.0816, + 26.8306, 28.0936, 29.4166, 28.0456, 29.3686, 30.7516 + }; + return values[idx++]; + }); + idx = 0; + std::generate(expected_v.begin(), expected_v.end(), [&idx]() { + OutType values[] = { + 24.4009, 25.5439, 26.7469, 25.6159, 26.8189, 28.0819, + 26.8309, 28.0939, 29.4169, 28.0459, 29.3689, 30.7519, + 36.4306, 37.5736, 38.7766, 38.2456, 39.4486, 40.7116, + 40.0606, 41.3236, 42.6466, 41.8756, 43.1986, 44.5816 + }; + return values[idx++]; + }); + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + EXPECT_NEAR(p_u_out_h[vert_at(jv, jk, jb)], + expected_u[vert_at(jv, jk, jb)], + static_cast<OutType>(1e-5)) + << "u failure at block " << jb << ", level " << jk << ", index " << jv; + EXPECT_NEAR(p_v_out_h[vert_at(jv, jk, jb)], + expected_v[vert_at(jv, jk, jb)], + static_cast<OutType>(1e-5)) + << "v failure at block " << jb << ", level " << jk << ", index " << jv; + } + } + } +} + +TYPED_TEST(RbfVecInterpolDoubleParamTest, VertexRandom) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; + + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + constexpr int rbf_vec_dim_v = 6; // Fixed dimension for RBF + + // Define indexing helpers + const auto &edge_at = at<nproma, nlev, nblks_e>; + const auto &idx_at = at<rbf_vec_dim_v, nproma, nblks_v>; + const auto &blk_at = at<rbf_vec_dim_v, nproma, nblks_v>; + const auto &coeff_at = at<rbf_vec_dim_v, 2, nproma, nblks_v>; + const auto &vert_at = at<nproma, nlev, nblks_v>; - // Call the function with mixed precision. + // Create host mirror views + auto p_e_in_h = Kokkos::create_mirror_view(this->p_e_in); + auto rbf_vec_idx_v_h = Kokkos::create_mirror_view(this->rbf_vec_idx_v); + auto rbf_vec_blk_v_h = Kokkos::create_mirror_view(this->rbf_vec_blk_v); + auto rbf_vec_coeff_v_h = Kokkos::create_mirror_view(this->rbf_vec_coeff_v); + auto p_u_out_h = Kokkos::create_mirror_view(this->p_u_out); + auto p_v_out_h = Kokkos::create_mirror_view(this->p_v_out); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_int_distribution<int> edge_distrib(0, nproma - 1); + std::uniform_int_distribution<int> block_distrib(0, nblks_e - 1); + std::uniform_real_distribution<double> real_distrib(0.01, 1.0); + + // Initialize with random values + for (int ib = 0; ib < nblks_e; ++ib) { + for (int ik = 0; ik < nlev; ++ik) { + for (int ic = 0; ic < nproma; ++ic) { + p_e_in_h[edge_at(ic, ik, ib)] = static_cast<InType>(real_distrib(gen)); + } + } + } + + // Initialize vertex connectivity indices with random values + for (int ib = 0; ib < nblks_v; ++ib) { + for (int ic = 0; ic < nproma; ++ic) { + + for (int j = 0; j < rbf_vec_dim_v; ++j) { + // Random edge indices and blocks + rbf_vec_idx_v_h[idx_at(j, ic, ib)] = edge_distrib(gen); + rbf_vec_blk_v_h[blk_at(j, ic, ib)] = block_distrib(gen); + // Random coefficients for interpolation + rbf_vec_coeff_v_h[coeff_at(j, 0, ic, ib)] = static_cast<InType>(real_distrib(gen)); + rbf_vec_coeff_v_h[coeff_at(j, 1, ic, ib)] = static_cast<InType>(real_distrib(gen)); + } + + // Initialize output to zero + for (int ik = 0; ik < nlev; ++ik) { + p_u_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + p_v_out_h[vert_at(ic, ik, ib)] = static_cast<OutType>(0.0); + } + } + } + + // Copy to device + Kokkos::deep_copy(this->p_e_in, p_e_in_h); + Kokkos::deep_copy(this->rbf_vec_idx_v, rbf_vec_idx_v_h); + Kokkos::deep_copy(this->rbf_vec_blk_v, rbf_vec_blk_v_h); + Kokkos::deep_copy(this->rbf_vec_coeff_v, rbf_vec_coeff_v_h); + Kokkos::deep_copy(this->p_u_out, p_u_out_h); + Kokkos::deep_copy(this->p_v_out, p_v_out_h); + + Kokkos::fence(); + + // Call the function rbf_vec_interpol_vertex_lib<InType, OutType>( this->p_e_in.data(), this->rbf_vec_idx_v.data(), this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(), this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->lacc, this->acc_async, this->nlev, - this->nblks_e, this->nblks_v); - - // Check the outputs only for blocks in the range [i_startblk, i_endblk]. - for (int block = this->i_startblk; block <= this->i_endblk; ++block) { - for (int level = 0; level < this->nlev; ++level) { - for (int i = 0; i < this->nproma; ++i) { - // Compute the linear index for a 3D array in column-major order: - size_t idx = - i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, - // expect 6. - EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), - static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; - EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), - static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " - << i; + this->elev, nproma, this->lacc, this->acc_async, nlev, + nblks_e, nblks_v); + + Kokkos::fence(); + + // Copy results back to host + Kokkos::deep_copy(p_u_out_h, this->p_u_out); + Kokkos::deep_copy(p_v_out_h, this->p_v_out); + + // Calculate expected values + using host_space = Kokkos::HostSpace; + Kokkos::View<OutType***, host_space> expected_u("expected_u", nproma, nlev, nblks_v); + Kokkos::View<OutType***, host_space> expected_v("expected_v", nproma, nlev, nblks_v); + + // Compute expected values + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + expected_u(jv, jk, jb) = static_cast<OutType>(0.0); + expected_v(jv, jk, jb) = static_cast<OutType>(0.0); + + for (int j = 0; j < rbf_vec_dim_v; ++j) { + int edge_idx = rbf_vec_idx_v_h[idx_at(j, jv, jb)]; + int edge_blk = rbf_vec_blk_v_h[blk_at(j, jv, jb)]; + InType coeff_u = rbf_vec_coeff_v_h[coeff_at(j, 0, jv, jb)]; + InType coeff_v = rbf_vec_coeff_v_h[coeff_at(j, 1, jv, jb)]; + + expected_u(jv, jk, jb) += + static_cast<OutType>(coeff_u * p_e_in_h[edge_at(edge_idx, jk, edge_blk)]); + expected_v(jv, jk, jb) += + static_cast<OutType>(coeff_v * p_e_in_h[edge_at(edge_idx, jk, edge_blk)]); + } + } + } + } + + // Verify results + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = i_startidx; jv <= i_endidx; ++jv) { + EXPECT_NEAR(p_u_out_h[vert_at(jv, jk, jb)], + expected_u(jv, jk, jb), + static_cast<OutType>(1e-5)) + << "u failure at block " << jb << ", level " << jk << ", index " << jv; + EXPECT_NEAR(p_v_out_h[vert_at(jv, jk, jb)], + expected_v(jv, jk, jb), + static_cast<OutType>(1e-5)) + << "v failure at block " << jb << ", level " << jk << ", index " << jv; } } } -- GitLab From a116368e03fc0ce3d5025bbfa3c34c17f08588d3 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 9 Apr 2025 11:00:04 +0200 Subject: [PATCH 21/34] modified test_tdma_solver to enable the use them on GPU --- test/c/CMakeLists.txt | 2 +- test/c/test_tdma_solver.cpp | 349 +++++++++++++++++++++++++++++++----- 2 files changed, 308 insertions(+), 43 deletions(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 175b226..c93e30a 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -32,7 +32,7 @@ set(SOURCES test_horizontal_div.cpp test_horizontal_recon.cpp test_horizontal_rot.cpp - # test_tdma_solver.cpp + test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp test_interpolation_scalar.cpp diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 4e09ff3..3a0165a 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -13,76 +13,341 @@ #include <vector> #include <algorithm> #include "mo_math_utilities.hpp" +#include "dim_helper.hpp" +#include <Kokkos_Core.hpp> +#include <random> -// Helper function to compute the 1D index for column-major storage. +// Helper function for column-major indexing +template <typename T> inline int idx(int i, int j, int nrows) { return i + j * nrows; } -// Test fixture for the TDMA solver tests. -class TDMASolverTestFixture : public ::testing::Test { +template <typename T> +class TDMASolverTypedTestFixture : public ::testing::Test { protected: - const int n = 10; // Matrix dimension. - std::vector<double> a; // Input matrix a. - std::vector<double> b; // Input matrix b. - std::vector<double> c; // Input matrix c. - std::vector<double> d; // Input matrix d. - std::vector<double> x; // Output matrix. - - TDMASolverTestFixture() - : a(n * n), b(n * n), c(n * n), d(n * n), x(n * n, 0.0) {} - - // SetUp is run before each test. - void SetUp() override { - // Fill arrays in column-major order. + const int n = 10; // Matrix dimension. + + // Using Kokkos execution and memory spaces + using exec_space = Kokkos::DefaultExecutionSpace; + using memory_space = exec_space::memory_space; + + // Kokkos Views for test data + Kokkos::View<T*, memory_space> a; // Input matrix a. + Kokkos::View<T*, memory_space> b; // Input matrix b. + Kokkos::View<T*, memory_space> c; // Input matrix c. + Kokkos::View<T*, memory_space> d; // Input matrix d. + Kokkos::View<T*, memory_space> x; // Output matrix. + + TDMASolverTypedTestFixture() + : a("a", n * n), + b("b", n * n), + c("c", n * n), + d("d", n * n), + x("x", n * n) + {} + + void SetUpSpecificTest() { + // Create host mirror views + auto a_h = Kokkos::create_mirror_view(a); + auto b_h = Kokkos::create_mirror_view(b); + auto c_h = Kokkos::create_mirror_view(c); + auto d_h = Kokkos::create_mirror_view(d); + auto x_h = Kokkos::create_mirror_view(x); + + // Fill arrays in column-major order with the specific test values for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { - double value = (i + 1) + (j + 1); - a[idx(i, j, n)] = 1.0 * value; - b[idx(i, j, n)] = 2.0 * value; - c[idx(i, j, n)] = 1.0 * value; - d[idx(i, j, n)] = 1.0 * value; + T value = static_cast<T>((i + 1) + (j + 1)); + a_h[idx<T>(i, j, n)] = static_cast<T>(1.0) * value; + b_h[idx<T>(i, j, n)] = static_cast<T>(2.0) * value; + c_h[idx<T>(i, j, n)] = static_cast<T>(1.0) * value; + d_h[idx<T>(i, j, n)] = static_cast<T>(1.0) * value; + x_h[idx<T>(i, j, n)] = static_cast<T>(0.0); } } - // Clear the output vector. - std::fill(x.begin(), x.end(), 0.0); + + // Copy to device + Kokkos::deep_copy(a, a_h); + Kokkos::deep_copy(b, b_h); + Kokkos::deep_copy(c, c_h); + Kokkos::deep_copy(d, d_h); + Kokkos::deep_copy(x, x_h); } }; -TEST_F(TDMASolverTestFixture, FullTest) { - // Call the solver over the full range: - tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), - 0, n, 0, n, n, n, x.data()); +// Define the types we want to test with +typedef ::testing::Types<float, double> NumericTypes; +TYPED_TEST_SUITE(TDMASolverTypedTestFixture, NumericTypes); + +// Specific test for the full matrix +TYPED_TEST(TDMASolverTypedTestFixture, SpecificFull) { + const int n = this->n; + + // Set up the test with specific values + this->SetUpSpecificTest(); + + // Call the solver over the full range + tdma_solver_vec<TypeParam>( + this->a.data(), this->b.data(), this->c.data(), this->d.data(), + 0, n, 0, n, n, n, this->x.data()); + + // Copy results back to host + auto x_h = Kokkos::create_mirror_view(this->x); + Kokkos::deep_copy(x_h, this->x); - // Compute the sum of all elements in the output matrix. - double sum = 0.0; + // Compute the sum of all elements in the output matrix + TypeParam sum = 0.0; for (int j = 0; j < n; j++) { for (int i = 0; i < n; i++) { - sum += x[idx(i, j, n)]; + sum += x_h[idx<TypeParam>(i, j, n)]; } } - // Expected reference sum - double sum_ref = 27.2727272727272769; - double tol = 1e-13; + // Expected reference sum (adjusted for precision) + TypeParam sum_ref = static_cast<TypeParam>(27.2727272727272769); + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-6) : static_cast<TypeParam>(1e-13); + EXPECT_NEAR(sum, sum_ref, tol); } -TEST_F(TDMASolverTestFixture, PartialTest) { - // Call the solver for a partial region: - // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1. - tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(), - 1, n - 1, 1, n - 1, n, n, x.data()); +// Specific test for a partial region +TYPED_TEST(TDMASolverTypedTestFixture, SpecificPartial) { + const int n = this->n; + + // Set up the test with specific values + this->SetUpSpecificTest(); + + // Call the solver for a partial region + tdma_solver_vec<TypeParam>( + this->a.data(), this->b.data(), this->c.data(), this->d.data(), + 1, n - 1, 1, n - 1, n, n, this->x.data()); + + // Copy results back to host + auto x_h = Kokkos::create_mirror_view(this->x); + Kokkos::deep_copy(x_h, this->x); // Compute the sum over a region - double sum = 0.0; + TypeParam sum = 0.0; for (int j = 1; j < n - 1; j++) { for (int i = 1; i < n - 1; i++) { - sum += x[idx(i, j, n)]; + sum += x_h[idx<TypeParam>(i, j, n)]; } } - double sum_ref = 17.7777777777777679; - double tol = 1e-13; + // Expected reference sum (adjusted for precision) + TypeParam sum_ref = static_cast<TypeParam>(17.7777777777777679); + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-6) : static_cast<TypeParam>(1e-13); + EXPECT_NEAR(sum, sum_ref, tol); } + +// Random test for the full matrix +TYPED_TEST(TDMASolverTypedTestFixture, RandomFull) { + const int n = this->n; + + // Create host mirror views + auto a_h = Kokkos::create_mirror_view(this->a); + auto b_h = Kokkos::create_mirror_view(this->b); + auto c_h = Kokkos::create_mirror_view(this->c); + auto d_h = Kokkos::create_mirror_view(this->d); + auto x_h = Kokkos::create_mirror_view(this->x); + + // Use fixed seed for reproducibility + std::mt19937 gen(42); + std::uniform_real_distribution<double> diag_dist(5.0, 10.0); // For main diagonal + std::uniform_real_distribution<double> off_diag_dist(0.1, 2.0); // For off-diagonals + std::uniform_real_distribution<double> rhs_dist(-10.0, 10.0); // For right-hand side + + // Fill arrays with random values + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + a_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-off_diag_dist(gen)); + b_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(diag_dist(gen)); + c_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-off_diag_dist(gen)); + d_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(rhs_dist(gen)); + x_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(0.0); + } + } + + // Save copies for reference solution + std::vector<TypeParam> a_copy(n * n); + std::vector<TypeParam> b_copy(n * n); + std::vector<TypeParam> c_copy(n * n); + std::vector<TypeParam> d_copy(n * n); + std::vector<TypeParam> x_expected(n * n, 0.0); + + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + a_copy[idx<TypeParam>(i, j, n)] = a_h[idx<TypeParam>(i, j, n)]; + b_copy[idx<TypeParam>(i, j, n)] = b_h[idx<TypeParam>(i, j, n)]; + c_copy[idx<TypeParam>(i, j, n)] = c_h[idx<TypeParam>(i, j, n)]; + d_copy[idx<TypeParam>(i, j, n)] = d_h[idx<TypeParam>(i, j, n)]; + } + } + + // Copy to device + Kokkos::deep_copy(this->a, a_h); + Kokkos::deep_copy(this->b, b_h); + Kokkos::deep_copy(this->c, c_h); + Kokkos::deep_copy(this->d, d_h); + Kokkos::deep_copy(this->x, x_h); + + // Call the solver over the full range + tdma_solver_vec<TypeParam>( + this->a.data(), this->b.data(), this->c.data(), this->d.data(), + 0, n, 0, n, n, n, this->x.data()); + + // Copy results back to host + Kokkos::deep_copy(x_h, this->x); + + // Calculate reference solution + for (int i = 0; i < n; i++) { + // Arrays for internal calculations + std::vector<TypeParam> c_prime(n, 0.0); + std::vector<TypeParam> d_prime(n, 0.0); + + // Forward sweep + c_prime[0] = c_copy[idx<TypeParam>(i, 0, n)] / b_copy[idx<TypeParam>(i, 0, n)]; + d_prime[0] = d_copy[idx<TypeParam>(i, 0, n)] / b_copy[idx<TypeParam>(i, 0, n)]; + + for (int j = 1; j < n; j++) { + TypeParam m = static_cast<TypeParam>(1.0) / + (b_copy[idx<TypeParam>(i, j, n)] - c_prime[j-1] * a_copy[idx<TypeParam>(i, j, n)]); + c_prime[j] = c_copy[idx<TypeParam>(i, j, n)] * m; + d_prime[j] = (d_copy[idx<TypeParam>(i, j, n)] - d_prime[j-1] * a_copy[idx<TypeParam>(i, j, n)]) * m; + } + + // Back substitution + x_expected[idx<TypeParam>(i, n-1, n)] = d_prime[n-1]; + + for (int j = n-2; j >= 0; j--) { + x_expected[idx<TypeParam>(i, j, n)] = d_prime[j] - c_prime[j] * x_expected[idx<TypeParam>(i, j+1, n)]; + } + } + + // Set tolerance based on type + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-10); + + // Verify that individual values match + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + EXPECT_NEAR(x_h[idx<TypeParam>(i, j, n)], x_expected[idx<TypeParam>(i, j, n)], tol) + << "Mismatch at i=" << i << ", j=" << j; + } + } +} + +// Random test for a partial region +TYPED_TEST(TDMASolverTypedTestFixture, RandomPartial) { + const int n = this->n; + const int slev = 1; + const int elev = n - 1; + const int startidx = 1; + const int endidx = n - 1; + + // Create host mirror views + auto a_h = Kokkos::create_mirror_view(this->a); + auto b_h = Kokkos::create_mirror_view(this->b); + auto c_h = Kokkos::create_mirror_view(this->c); + auto d_h = Kokkos::create_mirror_view(this->d); + auto x_h = Kokkos::create_mirror_view(this->x); + + // Use fixed seed for reproducibility + std::mt19937 gen(43); + std::uniform_real_distribution<double> diag_dist(5.0, 10.0); + std::uniform_real_distribution<double> off_diag_dist(0.1, 2.0); + std::uniform_real_distribution<double> rhs_dist(-10.0, 10.0); + + // Initialize all values to something that shouldn't be touched + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + a_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-999.0); + b_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-999.0); + c_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-999.0); + d_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-999.0); + x_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(0.0); + } + } + + // Set random values only for the region to be processed + for (int j = slev; j < elev; j++) { + for (int i = startidx; i < endidx; i++) { + a_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-off_diag_dist(gen)); + b_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(diag_dist(gen)); + c_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(-off_diag_dist(gen)); + d_h[idx<TypeParam>(i, j, n)] = static_cast<TypeParam>(rhs_dist(gen)); + } + } + + // Save copies for reference solution + std::vector<TypeParam> a_copy(n * n, static_cast<TypeParam>(-999.0)); + std::vector<TypeParam> b_copy(n * n, static_cast<TypeParam>(-999.0)); + std::vector<TypeParam> c_copy(n * n, static_cast<TypeParam>(-999.0)); + std::vector<TypeParam> d_copy(n * n, static_cast<TypeParam>(-999.0)); + std::vector<TypeParam> x_expected(n * n, static_cast<TypeParam>(0.0)); + + for (int j = slev; j < elev; j++) { + for (int i = startidx; i < endidx; i++) { + a_copy[idx<TypeParam>(i, j, n)] = a_h[idx<TypeParam>(i, j, n)]; + b_copy[idx<TypeParam>(i, j, n)] = b_h[idx<TypeParam>(i, j, n)]; + c_copy[idx<TypeParam>(i, j, n)] = c_h[idx<TypeParam>(i, j, n)]; + d_copy[idx<TypeParam>(i, j, n)] = d_h[idx<TypeParam>(i, j, n)]; + } + } + + // Copy to device + Kokkos::deep_copy(this->a, a_h); + Kokkos::deep_copy(this->b, b_h); + Kokkos::deep_copy(this->c, c_h); + Kokkos::deep_copy(this->d, d_h); + Kokkos::deep_copy(this->x, x_h); + + // Call the solver for the partial region + tdma_solver_vec<TypeParam>( + this->a.data(), this->b.data(), this->c.data(), this->d.data(), + slev, elev, startidx, endidx, n, n, this->x.data()); + + // Copy results back to host + Kokkos::deep_copy(x_h, this->x); + + // Calculate reference solution for the partial region + for (int i = startidx; i < endidx; i++) { + // Arrays for internal calculations + std::vector<TypeParam> c_prime(n, 0.0); + std::vector<TypeParam> d_prime(n, 0.0); + + // Forward sweep + c_prime[slev] = c_copy[idx<TypeParam>(i, slev, n)] / b_copy[idx<TypeParam>(i, slev, n)]; + d_prime[slev] = d_copy[idx<TypeParam>(i, slev, n)] / b_copy[idx<TypeParam>(i, slev, n)]; + + for (int j = slev + 1; j < elev; j++) { + TypeParam m = static_cast<TypeParam>(1.0) / + (b_copy[idx<TypeParam>(i, j, n)] - c_prime[j-1] * a_copy[idx<TypeParam>(i, j, n)]); + c_prime[j] = c_copy[idx<TypeParam>(i, j, n)] * m; + d_prime[j] = (d_copy[idx<TypeParam>(i, j, n)] - d_prime[j-1] * a_copy[idx<TypeParam>(i, j, n)]) * m; + } + + // Back substitution + x_expected[idx<TypeParam>(i, elev-1, n)] = d_prime[elev-1]; + + for (int j = elev-2; j >= slev; j--) { + x_expected[idx<TypeParam>(i, j, n)] = d_prime[j] - c_prime[j] * x_expected[idx<TypeParam>(i, j+1, n)]; + } + } + + // Set tolerance based on type + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-10); + + // Verify that individual values match + for (int j = slev; j < elev; j++) { + for (int i = startidx; i < endidx; i++) { + EXPECT_NEAR(x_h[idx<TypeParam>(i, j, n)], x_expected[idx<TypeParam>(i, j, n)], tol) + << "Mismatch at i=" << i << ", j=" << j; + } + } +} -- GitLab From febab9e1b759a43249d1f46b80030bb86d983c25 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 9 Apr 2025 11:06:39 +0200 Subject: [PATCH 22/34] fixed a small linting issue --- test/c/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index c93e30a..c0f7c59 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -26,7 +26,6 @@ message(CHECK_PASS "done") # Find Kokkos (or use your existing Kokkos installation) # find_package(Kokkos REQUIRED) - set(SOURCES main.cpp test_horizontal_div.cpp -- GitLab From 20f7e7ca5a349189ceaa2b28da7ea152094fa416 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 9 Apr 2025 11:26:57 +0200 Subject: [PATCH 23/34] changed the tolerance for float in test_tdma_solver changed few more tolerance values --- test/c/test_tdma_solver.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp index 3a0165a..bb1dddf 100644 --- a/test/c/test_tdma_solver.cpp +++ b/test/c/test_tdma_solver.cpp @@ -107,7 +107,7 @@ TYPED_TEST(TDMASolverTypedTestFixture, SpecificFull) { // Expected reference sum (adjusted for precision) TypeParam sum_ref = static_cast<TypeParam>(27.2727272727272769); TypeParam tol = std::is_same<TypeParam, float>::value ? - static_cast<TypeParam>(1e-6) : static_cast<TypeParam>(1e-13); + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); EXPECT_NEAR(sum, sum_ref, tol); } @@ -139,7 +139,7 @@ TYPED_TEST(TDMASolverTypedTestFixture, SpecificPartial) { // Expected reference sum (adjusted for precision) TypeParam sum_ref = static_cast<TypeParam>(17.7777777777777679); TypeParam tol = std::is_same<TypeParam, float>::value ? - static_cast<TypeParam>(1e-6) : static_cast<TypeParam>(1e-13); + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); EXPECT_NEAR(sum, sum_ref, tol); } @@ -230,7 +230,7 @@ TYPED_TEST(TDMASolverTypedTestFixture, RandomFull) { // Set tolerance based on type TypeParam tol = std::is_same<TypeParam, float>::value ? - static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-10); + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); // Verify that individual values match for (int j = 0; j < n; j++) { @@ -341,7 +341,7 @@ TYPED_TEST(TDMASolverTypedTestFixture, RandomPartial) { // Set tolerance based on type TypeParam tol = std::is_same<TypeParam, float>::value ? - static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-10); + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); // Verify that individual values match for (int j = slev; j < elev; j++) { -- GitLab From 5cee3e94f6ebd01998ee73afefb252b86a5f6913 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Wed, 9 Apr 2025 15:09:52 +0200 Subject: [PATCH 24/34] added type dependent tolerance for the tests which use random input arrays --- test/c/test_horizontal_div.cpp | 29 +++++++++---- test/c/test_horizontal_recon.cpp | 30 +++++++++++--- test/c/test_horizontal_rot.cpp | 10 ++++- test/c/test_interpolation_scalar.cpp | 61 ++++++++++++++++++++-------- test/c/test_interpolation_vector.cpp | 15 ++----- test/c/test_intp_rbf.cpp | 33 ++++++++------- 6 files changed, 120 insertions(+), 58 deletions(-) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index cf5f320..06e6edd 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -238,11 +238,14 @@ TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + ref_div_vec_c[div_vec_c_at(i, k, 0)], tol) << "Results differ at i=" << i << ", k=" << k; } } @@ -468,11 +471,14 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for first field for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + ref_div_vec_c[div_vec_c_at(i, k, 0)], tol) << "First field results differ at i=" << i << ", k=" << k; } } @@ -481,7 +487,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(f4dout_h[f4dout_at(i, k, 0, 0)], - ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + ref_f4dout[f4dout_at(i, k, 0, 0)], tol) << "Second field results differ at i=" << i << ", k=" << k; } } @@ -625,6 +631,9 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { // Copy results back to host for verification Kokkos::deep_copy(f4dout_h, this->f4dout); + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Compute reference result and check for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -642,7 +651,7 @@ TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { geofac_div_h[geofac_div_at(jc, je, jb)]; } - EXPECT_NEAR(f4dout_h[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + EXPECT_NEAR(f4dout_h[f4dout_at(jc, jk, jb, ji)], expected, tol) << "Random test fails at jc=" << jc << ", jk=" << jk << ", jb=" << jb << ", ji=" << ji; } @@ -998,15 +1007,18 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + ref_div_vec_c[div_vec_c_at(i, k, 0)], tol) << "div_vec_c results differ at i=" << i << ", k=" << k; EXPECT_NEAR(opt_out2_h[div_vec_c_at(i, k, 0)], - ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + ref_opt_out2[div_vec_c_at(i, k, 0)], tol) << "opt_out2 results differ at i=" << i << ", k=" << k; } } @@ -1332,12 +1344,15 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results - only check div_vec_c since l2fields=false means opt_out2 // isn't updated for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(div_vec_c_h[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + ref_div_vec_c[div_vec_c_at(i, k, 0)], tol) << "div_vec_c results differ at i=" << i << ", k=" << k; } } diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index d8ea721..0264805 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -342,11 +342,14 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) << "For loop result fails for i = " << i << ", jc = " << jc; } } @@ -532,11 +535,14 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) << "For loop result fails for i = " << i << ", jc = " << jc; } } @@ -825,10 +831,13 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(p_coeff_h[p_coeff_at(i, jc, 0, 0)], p_result_h(i, jc), 1e-5) + EXPECT_NEAR(p_coeff_h[p_coeff_at(i, jc, 0, 0)], p_result_h(i, jc), tol) << "For loop result fails for i = " << i << ", jc = " << jc; } } @@ -1037,11 +1046,14 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int j = 0; j < lsq_dim_unk + 1; ++j) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], tol) << "For loop result fails for j = " << j << ", jc = " << jc; } } @@ -1306,11 +1318,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int j = 0; j < lsq_dim_unk + 1; ++j) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], tol) << "For loop result fails for j = " << j << ", jc = " << jc; } } @@ -1537,11 +1552,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) << "For loop result fails for i = " << i << ", jc = " << jc; } } diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp index 69e9d03..f93cddd 100644 --- a/test/c/test_horizontal_rot.cpp +++ b/test/c/test_horizontal_rot.cpp @@ -247,11 +247,14 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(rot_vec_h[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + ref_rot_vec[rot_vec_at(i, k, 0)], tol) << "Results differ at i=" << i << ", k=" << k; } } @@ -436,11 +439,14 @@ TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(rot_vec_h[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + ref_rot_vec[rot_vec_at(i, k, 0)], tol) << "Results differ at i=" << i << ", k=" << k << ")"; } } diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 94c33dd..c3158ad 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -209,6 +209,16 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesSpecific) { // Copy results back to host Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); + // print results in one line + std::cout << "p_edge_out_h: "; + for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { + for (int jk = this->slev; jk <= this->elev; ++jk) { + for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { + std::cout << p_edge_out_h[edge_at(jv, jk, jb)] << " "; + } + } + } + std::cout << std::endl; // Expected results based on the specific test values std::vector<TypeParam> expected_edges(12); int idx = 0; @@ -330,13 +340,15 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { EXPECT_NEAR(p_edge_out_h[edge_at(jv, jk, jb)], - expected_edges(jv, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_edges(jv, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } @@ -534,13 +546,15 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Edges2VertsRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], - expected_verts(jv, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_verts(jv, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } @@ -738,13 +752,15 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Edges2CellsRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], - expected_cells(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_cells(jc, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } @@ -947,14 +963,16 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2CellsRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = 0; jb < nblks_c; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { int nlen = (jb != nblks_c - 1) ? nproma : npromz_c; for (int jc = 0; jc < nlen; ++jc) { EXPECT_NEAR(p_cell_out_h[cell_at(jc, jk, jb)], - expected_cells(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_cells(jc, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } @@ -1160,13 +1178,15 @@ TYPED_TEST(InterpolationScalarSingleParamTest, CellAvgLibRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jc = this->i_startidx; jc <= this->i_endidx; ++jc) { EXPECT_NEAR(avg_psi_c_h[avg_at(jc, jk, jb)], - expected_avg(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_avg(jc, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jc; } } @@ -1452,6 +1472,9 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2EdgesRandom) { } } + OutType tol = std::is_same<OutType, float>::value ? + static_cast<OutType>(1e-5) : static_cast<OutType>(1e-13); + // Verify results for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; @@ -1461,8 +1484,7 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2EdgesRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int je = i_startidx; je <= i_endidx; ++je) { EXPECT_NEAR(p_edge_out_h[edge_at(je, jk, jb)], - expected_edges(je, jk, jb), - static_cast<OutType>(1e-5)) + expected_edges(je, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << je; } } @@ -1677,6 +1699,9 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2VertsRandom) { } } + OutType tol = std::is_same<OutType, float>::value ? + static_cast<OutType>(1e-5) : static_cast<OutType>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -1686,8 +1711,7 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2VertsRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jv = i_startidx; jv <= i_endidx; ++jv) { EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], - expected_verts(jv, jk, jb), - static_cast<OutType>(1e-5)) + expected_verts(jv, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jv; } } @@ -1982,6 +2006,9 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRIRandom) { } } + OutType tol = std::is_same<OutType, float>::value ? + static_cast<OutType>(1e-5) : static_cast<OutType>(1e-13); + // Verify results - using the appropriate indexing depending on __LOOP_EXCHANGE for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -1992,13 +2019,11 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRIRandom) { for (int jv = i_startidx; jv <= i_endidx; ++jv) { #ifdef __LOOP_EXCHANGE EXPECT_NEAR(p_vert_out_h[vert_at(jv, jk, jb)], - expected_verts(jv, jk, jb), - static_cast<OutType>(1e-5)) + expected_verts(jv, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jv; #else EXPECT_NEAR(p_vert_out_h[vert_at(jk, jv, jb)], - expected_verts(jv, jk, jb), - static_cast<OutType>(1e-5)) + expected_verts(jv, jk, jb), tol) << "Failure at block " << jb << ", level " << jk << ", index " << jv; #endif } diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp index 497b1d5..0806e35 100644 --- a/test/c/test_interpolation_vector.cpp +++ b/test/c/test_interpolation_vector.cpp @@ -139,8 +139,6 @@ TYPED_TEST(InterpolationVectorTest, Edges2CellsSpecific) { Kokkos::deep_copy(this->p_u_out, p_u_out_h); Kokkos::deep_copy(this->p_v_out, p_v_out_h); - Kokkos::fence(); - // Call the function edges2cells_vector_lib<TypeParam>( this->p_vn_in.data(), this->p_vt_in.data(), @@ -150,8 +148,6 @@ TYPED_TEST(InterpolationVectorTest, Edges2CellsSpecific) { this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, nproma, nlev, nblks_e, nblks_c); - Kokkos::fence(); - // Copy results back to host Kokkos::deep_copy(p_u_out_h, this->p_u_out); Kokkos::deep_copy(p_v_out_h, this->p_v_out); @@ -304,8 +300,6 @@ TYPED_TEST(InterpolationVectorTest, Edges2CellsRandom) { Kokkos::deep_copy(this->p_u_out, p_u_out_h); Kokkos::deep_copy(this->p_v_out, p_v_out_h); - Kokkos::fence(); - // Call the function edges2cells_vector_lib<TypeParam>( this->p_vn_in.data(), this->p_vt_in.data(), @@ -315,8 +309,6 @@ TYPED_TEST(InterpolationVectorTest, Edges2CellsRandom) { this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, nproma, nlev, nblks_e, nblks_c); - Kokkos::fence(); - // Copy results back to host Kokkos::deep_copy(p_u_out_h, this->p_u_out); Kokkos::deep_copy(p_v_out_h, this->p_v_out); @@ -377,15 +369,16 @@ TYPED_TEST(InterpolationVectorTest, Edges2CellsRandom) { } } - Kokkos::fence(); + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jc = this->i_startidx_in; jc <= this->i_endidx_in; ++jc) { - EXPECT_NEAR(p_u_out_h[out_at(jc, jk, 0)], expected_u(jc, jk, 0), 1e-5) + EXPECT_NEAR(p_u_out_h[out_at(jc, jk, 0)], expected_u(jc, jk, 0), tol) << "u value mismatch at jc=" << jc << ", jk=" << jk; - EXPECT_NEAR(p_v_out_h[out_at(jc, jk, 0)], expected_v(jc, jk, 0), 1e-5) + EXPECT_NEAR(p_v_out_h[out_at(jc, jk, 0)], expected_v(jc, jk, 0), tol) << "v value mismatch at jc=" << jc << ", jk=" << jk; } } diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index af72e65..49c7dec 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -372,6 +372,9 @@ TYPED_TEST(RbfInterpolSingleParamTest, C2GradRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -381,12 +384,10 @@ TYPED_TEST(RbfInterpolSingleParamTest, C2GradRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jc = i_startidx; jc <= i_endidx; ++jc) { EXPECT_NEAR(grad_x_h[grad_at(jc, jk, jb)], - expected_grad_x(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_grad_x(jc, jk, jb), tol) << "grad_x failure at block " << jb << ", level " << jk << ", index " << jc; EXPECT_NEAR(grad_y_h[grad_at(jc, jk, jb)], - expected_grad_y(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_grad_y(jc, jk, jb), tol) << "grad_y failure at block " << jb << ", level " << jk << ", index " << jc; } } @@ -638,6 +639,9 @@ TYPED_TEST(RbfInterpolSingleParamTest, CellRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -647,13 +651,11 @@ TYPED_TEST(RbfInterpolSingleParamTest, CellRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jc = i_startidx; jc <= i_endidx; ++jc) { EXPECT_NEAR(p_u_out_h[cell_at(jc, jk, jb)], - expected_u(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_u(jc, jk, jb), tol) << "u failure at block " << jb << ", level " << jk << ", index " << jc; EXPECT_NEAR(p_v_out_h[cell_at(jc, jk, jb)], - expected_v(jc, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_v(jc, jk, jb), tol) << "v failure at block " << jb << ", level " << jk << ", index " << jc; } } @@ -871,6 +873,9 @@ TYPED_TEST(RbfInterpolSingleParamTest, EdgeRandom) { } } + TypeParam tol = std::is_same<TypeParam, float>::value ? + static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -880,8 +885,7 @@ TYPED_TEST(RbfInterpolSingleParamTest, EdgeRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int je = i_startidx; je <= i_endidx; ++je) { EXPECT_NEAR(p_vt_out_h[vt_at(je, jk, jb)], - expected_vt(je, jk, jb), - static_cast<TypeParam>(1e-5)) + expected_vt(je, jk, jb), tol) << "Tangential velocity failure at block " << jb << ", level " << jk << ", index " << je; } } @@ -1169,6 +1173,9 @@ TYPED_TEST(RbfVecInterpolDoubleParamTest, VertexRandom) { } } + OutType tol = std::is_same<OutType, float>::value ? + static_cast<OutType>(1e-5) : static_cast<OutType>(1e-13); + // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { int i_startidx, i_endidx; @@ -1178,12 +1185,10 @@ TYPED_TEST(RbfVecInterpolDoubleParamTest, VertexRandom) { for (int jk = this->slev; jk <= this->elev; ++jk) { for (int jv = i_startidx; jv <= i_endidx; ++jv) { EXPECT_NEAR(p_u_out_h[vert_at(jv, jk, jb)], - expected_u(jv, jk, jb), - static_cast<OutType>(1e-5)) + expected_u(jv, jk, jb), tol) << "u failure at block " << jb << ", level " << jk << ", index " << jv; EXPECT_NEAR(p_v_out_h[vert_at(jv, jk, jb)], - expected_v(jv, jk, jb), - static_cast<OutType>(1e-5)) + expected_v(jv, jk, jb), tol) << "v failure at block " << jb << ", level " << jk << ", index " << jv; } } -- GitLab From 81a8a322c5c1fa9a1cfef06a51720da305b3da07 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:14:31 +0200 Subject: [PATCH 25/34] made some changes to unify the way loops are defined --- .../mo_lib_interpolation_scalar.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/interpolation/mo_lib_interpolation_scalar.cpp b/src/interpolation/mo_lib_interpolation_scalar.cpp index 51edcda..6b761dc 100644 --- a/src/interpolation/mo_lib_interpolation_scalar.cpp +++ b/src/interpolation/mo_lib_interpolation_scalar.cpp @@ -52,7 +52,7 @@ void verts2edges_scalar_lib(const T *p_vertex_in, const int *edge_vertex_idx, UnmanagedConstT3D coeff_int_view(coeff_int, nproma, 2, nblks_e); UnmanagedT3D p_edge_out_view(p_edge_out, nproma, nlev, nblks_e); - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, @@ -117,7 +117,7 @@ void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, i_startblk = i_startblk_in[0]; i_endblk = i_endblk_in[0]; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_e_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, @@ -147,7 +147,7 @@ void cells2edges_scalar_lib(const T *p_cell_in, const int *edge_cell_idx, i_startblk = i_startblk_in[1]; i_endblk = i_endblk_in[1]; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_e_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, @@ -204,7 +204,7 @@ void edges2verts_scalar_lib(const T *p_edge_in, const int *vert_edge_idx, UnmanagedConstT3D v_int_view(v_int, nproma, 6, nblks_v); UnmanagedT3D p_vert_out_view(p_vert_out, nproma, nlev, nblks_v); - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, @@ -277,7 +277,7 @@ void edges2cells_scalar_lib(const T *p_edge_in, const int *edge_idx, int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -345,7 +345,7 @@ void cells2verts_scalar_lib(const T *p_cell_in, const int *vert_cell_idx, int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -426,7 +426,7 @@ void cells2verts_scalar_ri_lib(const T *p_cell_in, const int *vert_cell_idx, int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); @@ -568,7 +568,7 @@ void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, // block indices of triangles next to each cell, dim: (nproma,nblks_c, 3) UnmanagedConstInt3D iblk_view(cell_neighbor_blk, nproma, nblks_c, 3); // cell_neighbour_blk - // averaging coefficients, dim: (nproma,nlev,nblks_c) + // averaging coefficients, dim: (nproma, 4, nblks_c) UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, 4, nblks_c); // cell based variable after averaging, dim: (nproma,nlev,nblks_c) @@ -576,7 +576,7 @@ void cell_avg_lib(const T *psi_c, const int *cell_neighbor_idx, int i_startidx, i_endidx; - for (int jb = i_startblk; jb < i_endblk + 1; ++jb) { + for (int jb = i_startblk; jb <= i_endblk; ++jb) { get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); -- GitLab From 6eb05ce5500298f21e8b6da6110a15399f41b005 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:14:54 +0200 Subject: [PATCH 26/34] removed some redundant lines --- src/horizontal/mo_lib_divrot.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp index dbbef65..0138cc0 100644 --- a/src/horizontal/mo_lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -255,10 +255,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, p_cc_view(jc, jk, jb); z_d[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - // }); - // Kokkos::parallel_for( - // "recon_lsq_cell_q_step2", innerPolicy, - // KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d[0] = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d[0] + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d[1] + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d[2] + @@ -413,10 +410,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, p_cc_view(jc, jk, jb); z_b[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - // }); - // Kokkos::parallel_for( - // "recon_lsq_cell_q_svd_step2", innerPolicy, - // KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(5, jc, jk, jb) = lsq_pseudoinv_view(jc, 4, 0, jb) * z_b[0] + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b[1] + @@ -558,10 +552,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, p_cc_view(jc, jk, jb); z_d[8] = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - // }); - // Kokkos::parallel_for( - // "recon_lsq_cell_c_step2", innerPolicy, - // KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d[0] = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d[0] + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d[1] + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d[2] + @@ -1023,8 +1014,6 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, -- GitLab From ce1cfd347a0e5da340738d94319a04b12a9e2f42 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:15:44 +0200 Subject: [PATCH 27/34] removed some redundant print statements --- test/c/test_interpolation_scalar.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index c3158ad..c84f4fe 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -209,15 +209,6 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesSpecific) { // Copy results back to host Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); - // print results in one line - std::cout << "p_edge_out_h: "; - for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { - for (int jk = this->slev; jk <= this->elev; ++jk) { - for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { - std::cout << p_edge_out_h[edge_at(jv, jk, jb)] << " "; - } - } - } std::cout << std::endl; // Expected results based on the specific test values std::vector<TypeParam> expected_edges(12); -- GitLab From 77aa898000cf8d1e2f60b4ddb47c0cf513aed133 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:16:06 +0200 Subject: [PATCH 28/34] removed declaration of unused integers --- test/c/test_horizontal_div.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index 06e6edd..9f7f4c0 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -665,7 +665,6 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; const auto &vec_e_at = at<nproma, nlev, nblks_e>; const auto &cell_edge_at = at<nproma, nblks_c, 3>; @@ -1029,7 +1028,6 @@ TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; const auto &vec_e_at = at<nproma, nlev, nblks_e>; const auto &cell_edge_at = at<nproma, nblks_c, 3>; -- GitLab From 075c960034a3487a27f68405cd0a7417360790c2 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:16:49 +0200 Subject: [PATCH 29/34] reverted back some unnecessary changes --- test/c/test_horizontal_recon.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index 0264805..32d084a 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -1050,11 +1050,11 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); // Check result - for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], tol) - << "For loop result fails for j = " << j << ", jc = " << jc; + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) + << "For loop result fails for i = " << i << ", jc = " << jc; } } } @@ -1322,11 +1322,11 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { static_cast<TypeParam>(1e-5) : static_cast<TypeParam>(1e-13); // Check result - for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(p_coeff_h[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], tol) - << "For loop result fails for j = " << j << ", jc = " << jc; + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) + << "For loop result fails for i = " << i << ", jc = " << jc; } } } -- GitLab From 25b45d792a1bd72b5d90547dbb54f244424bf0ca Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:17:20 +0200 Subject: [PATCH 30/34] fixed issues with a test routine in test_horizontal_recon --- test/c/test_horizontal_recon.cpp | 103 ++++++++++++++----------------- 1 file changed, 45 insertions(+), 58 deletions(-) diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp index 32d084a..b83886c 100644 --- a/test/c/test_horizontal_recon.cpp +++ b/test/c/test_horizontal_recon.cpp @@ -743,89 +743,75 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Copy results back to host Kokkos::deep_copy(p_coeff_h, this->p_coeff); - // Create host views for reference computation - using host_space = Kokkos::HostSpace; - - // Arrays for intermediate calculations - Kokkos::View<TypeParam ***, host_space> z_d_h("z_d_h", lsq_dim_c, nproma, - nlev); - Kokkos::View<TypeParam *, host_space> z_qt_times_d_h("z_qt_times_d_h", - lsq_dim_unk); + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - // Result view - Kokkos::View<TypeParam **, host_space> p_result_h("p_result_h", - lsq_dim_unk + 1, nproma); + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + p_result[(at<lsq_dim_unk + 1, nproma>(j, i))] = static_cast<TypeParam>(0.0); + } + } - // calculating only for jb=0 + // doing the calculation only for jb=0 for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - // Step 1: Calculate z_d values (matches the "recon_lsq_cell_q_step1" - // parallel_for) - // calculating only for jk = 0 + // Step 1: Calculate z_d values for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc <= i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { - z_d_h(i, jc, jk) = - p_cc_h[p_cc_at( - cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, - cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - - p_cc_h[p_cc_at(jc, jk, jb)]; + z_d[i] = p_cc_h[p_cc_at( + cell_neighbor_idx_h[cell_neighbor_at(jc, jb, i)], jk, + cell_neighbor_blk_h[cell_neighbor_at(jc, jb, i)])] - + p_cc_h[p_cc_at(jc, jk, jb)]; } - } - } - // Step 2: Calculate coefficients (matches the "recon_lsq_cell_q_step2" - // parallel_for) - // calculating only for jk = 0 - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc <= i_endidx; ++jc) { // Matrix multiplication (Q^T * d) for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d_h(j) = 0.0; + z_qt_times_d[j] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d_h(j) += - lsq_qtmat_c_h[qtmat_at(jc, j, i, jb)] * z_d_h(i, jc, jk); + z_qt_times_d[j] += + lsq_qtmat_c_h[qtmat_at(jc, j, i, jb)] * z_d[i]; } } // Back-substitution (mirrors the order in the GPU implementation) - p_result_h(5, jc) = - lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 4, jb)] * z_qt_times_d_h(4); + p_result[at<lsq_dim_unk + 1, nproma>(5, jc)] = + lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 4, jb)] * z_qt_times_d[4]; - p_result_h(4, jc) = + p_result[at<lsq_dim_unk + 1, nproma>(4, jc)] = lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 3, jb)] * - (z_qt_times_d_h(3) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 0, jb)] * p_result_h(5, jc)); - - p_result_h(3, jc) = + (z_qt_times_d[3] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 0, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(5, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(3, jc)] = lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 2, jb)] * - (z_qt_times_d_h(2) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 1, jb)] * p_result_h(4, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 2, jb)] * p_result_h(5, jc)); - - p_result_h(2, jc) = + (z_qt_times_d[2] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 1, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(4, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 2, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(5, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 1, jb)] * - (z_qt_times_d_h(1) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 3, jb)] * p_result_h(3, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 4, jb)] * p_result_h(4, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 5, jb)] * p_result_h(5, jc)); - - p_result_h(1, jc) = + (z_qt_times_d[1] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 3, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(3, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 4, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(4, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 5, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(5, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = lsq_rmat_rdiag_c_h[rmat_rdiag_at(jc, 0, jb)] * - (z_qt_times_d_h(0) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 6, jb)] * p_result_h(2, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 7, jb)] * p_result_h(3, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 8, jb)] * p_result_h(4, jc) - - lsq_rmat_utri_c_h[rmat_utri_at(jc, 9, jb)] * p_result_h(5, jc)); - + (z_qt_times_d[0] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 6, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 7, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(3, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 8, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(4, jc)] - + lsq_rmat_utri_c_h[rmat_utri_at(jc, 9, jb)] * p_result[at<lsq_dim_unk + 1, nproma>(5, jc)]); // Conservation correction - p_result_h(0, jc) = p_cc_h[p_cc_at(jc, jk, jb)]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_cc_h[p_cc_at(jc, jk, jb)]; for (int j = 0; j < lsq_dim_unk; ++j) { - p_result_h(0, jc) -= - p_result_h(j + 1, jc) * lsq_moments_h[moments_at(jc, jb, j)]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + lsq_moments_h[moments_at(jc, jb, j)]; } } } @@ -837,7 +823,8 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { // Check result for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(p_coeff_h[p_coeff_at(i, jc, 0, 0)], p_result_h(i, jc), tol) + EXPECT_NEAR(p_coeff_h[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], tol) << "For loop result fails for i = " << i << ", jc = " << jc; } } -- GitLab From d499e6926fa483a38f2c2208146396870b3cd975 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:23:05 +0200 Subject: [PATCH 31/34] changed the name of a local variable in mo_lib_loopindices --- src/support/mo_lib_loopindices.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp index 8c8d318..fcc31b6 100644 --- a/src/support/mo_lib_loopindices.cpp +++ b/src/support/mo_lib_loopindices.cpp @@ -19,19 +19,19 @@ void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int //Since code is ported incrementally from Fortran to C++, depending on where the function is called from //(either fortran or c++), the first index should be either 0 or 1. int first_index; - int nproma_loc; + int i_endidx_loc; if (called_from_cpp){ first_index = 0; - nproma_loc = nproma - 1; + i_endidx_loc = nproma - 1; } else { first_index = 1; - nproma_loc = nproma; + i_endidx_loc = nproma; } if (i_blk == i_startblk) { i_startidx_out = std::max(first_index, i_startidx_in); - i_endidx_out = nproma_loc; + i_endidx_out = i_endidx_loc; if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; } @@ -40,7 +40,7 @@ void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int i_endidx_out = i_endidx_in; } else { i_startidx_out = first_index; - i_endidx_out = nproma_loc; + i_endidx_out = i_endidx_loc; } } @@ -52,18 +52,18 @@ void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, //the first index should be either 0 or 1. int first_index; - int nproma_loc; + int i_endidx_loc; if (called_from_cpp) { first_index = 0; - nproma_loc = nproma - 1; + i_endidx_loc = nproma - 1; } else { first_index = 1; - nproma_loc = nproma; + i_endidx_loc = nproma; } i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in); - i_endidx_out = (i_blk != i_endblk) ? nproma_loc : i_endidx_in; + i_endidx_out = (i_blk != i_endblk) ? i_endidx_loc : i_endidx_in; } // get_indices_v_lib function @@ -74,19 +74,19 @@ void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, //the first index should be either 0 or 1. int first_index; - int nproma_loc; + int i_endidx_loc; if (called_from_cpp) { first_index = 0; - nproma_loc = nproma - 1; + i_endidx_loc = nproma - 1; } else { first_index = 1; - nproma_loc = nproma; + i_endidx_loc = nproma; } if (i_blk == i_startblk) { i_startidx_out = i_startidx_in; - i_endidx_out = nproma_loc; + i_endidx_out = i_endidx_loc; if (i_blk == i_endblk) { i_endidx_out = i_endidx_in; } @@ -95,6 +95,6 @@ void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int i_endidx_out = i_endidx_in; } else { i_startidx_out = first_index; - i_endidx_out = nproma_loc; + i_endidx_out = i_endidx_loc; } } -- GitLab From 57f9fd4482196a0ea5c8f7599f20633f2940f068 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 14:56:35 +0200 Subject: [PATCH 32/34] made the way reference result is assigned --- test/c/test_interpolation_scalar.cpp | 84 ++++++---------------------- test/c/test_intp_rbf.cpp | 63 +++++---------------- 2 files changed, 30 insertions(+), 117 deletions(-) diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index c84f4fe..20ccf4f 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -209,17 +209,11 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2EdgesSpecific) { // Copy results back to host Kokkos::deep_copy(p_edge_out_h, this->p_edge_out); - std::cout << std::endl; // Expected results based on the specific test values - std::vector<TypeParam> expected_edges(12); - int idx = 0; - std::generate(expected_edges.begin(), expected_edges.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_edges = { 1.505, 1.015, 1.605, 1.116, 1.705, 1.217, 1.525, 1.0251, 1.626, 1.1271, 1.727, 1.2291 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -421,15 +415,10 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Edges2VertsSpecific) { Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); // Expected results based on the specific test values - std::vector<TypeParam> expected_verts(12); - int idx = 0; - std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_verts = { 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -627,15 +616,10 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Edges2CellsSpecific) { Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); // Expected results based on the specific test values - std::vector<TypeParam> expected_cells(12); - int idx = 0; - std::generate(expected_cells.begin(), expected_cells.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_cells = { 1.37677, 1.7201, 1.47977, 1.8231, 1.58277, 1.9261, 1.3802, 1.72353, 1.4832, 1.82653, 1.5862, 1.92953 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -833,15 +817,10 @@ TYPED_TEST(InterpolationScalarSingleParamTest, Verts2CellsSpecific) { Kokkos::deep_copy(p_cell_out_h, this->p_cell_out); // Expected results based on the specific test values - std::vector<TypeParam> expected_cells(12); - int idx = 0; - std::generate(expected_cells.begin(), expected_cells.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_cells = { 1.37677, 1.7201, 1.47977, 1.8231, 1.58277, 1.9261, 1.3802, 1.72353, 1.4832, 1.82653, 1.5862, 1.92953 - }; - return values[idx++]; - }); + }; // Verify results - check the same ranges as in the expected calculation for (int jb = 0; jb < nblks_c; ++jb) { @@ -1047,15 +1026,10 @@ TYPED_TEST(InterpolationScalarSingleParamTest, CellAvgLibSpecific) { Kokkos::deep_copy(avg_psi_c_h, this->avg_psi_c); // Expected results based on the specific test values - std::vector<TypeParam> expected_avg(12); - int idx = 0; - std::generate(expected_avg.begin(), expected_avg.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_avg = { 1.402, 1.602, 1.502, 1.702, 1.602, 1.802, 1.408, 1.608, 1.508, 1.708, 1.608, 1.808 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -1324,15 +1298,10 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2EdgesSpecific) { int i_endidx_range = this->i_endidx_in[1]; // Expected results based on the specific test values - std::vector<OutType> expected_edges(12); - int idx = 0; - std::generate(expected_edges.begin(), expected_edges.end(), [&idx]() { - OutType values[] = { + std::vector<OutType> expected_edges = { 1.505, 1.5149, 1.605, 1.6149, 1.705, 1.7149, 1.505, 1.5151, 1.605, 1.6151, 1.705, 1.7151 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = i_startblk; jb <= i_endblk; ++jb) { @@ -1560,15 +1529,10 @@ TYPED_TEST(InterpolationScalarDoubleParamTest, Cells2VertsSpecific) { Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); // Expected results based on the specific test values - std::vector<OutType> expected_verts(12); - int idx = 0; - std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { - OutType values[] = { + std::vector<OutType> expected_verts = { 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -1833,10 +1797,7 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRISpecific) { Kokkos::deep_copy(p_vert_out_h, this->p_vert_out); // Expected results based on the specific test values - std::vector<OutType> expected_verts(12); - int idx = 0; - std::generate(expected_verts.begin(), expected_verts.end(), [&idx]() { - OutType values[] = { + std::vector<OutType> expected_verts = { #ifdef __LOOP_EXCHANGE 1.7459, 1.7159, 1.8609, 1.8309, 1.9759, 1.9459, 1.7456, 1.7156, 1.8606, 1.8306, 1.9756, 1.9456 @@ -1844,20 +1805,7 @@ TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRISpecific) { 1.7459, 1.8609, 1.9759, 1.7159, 1.8309, 1.9459, 1.7456, 1.8606, 1.9756, 1.7156, 1.8306, 1.9456 #endif - }; - return values[idx++]; - }); - - std::cout << "p_vert_out_h: " << std::endl; - // print out the array p_vert_out_h in one line - for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { - for (int jk = this->slev; jk <= this->elev; ++jk) { - for (int jv = this->i_startidx; jv <= this->i_endidx; ++jv) { - // std::cout << p_vert_out_h[vert_at(jk, jv, jb)] << ", "; - std::cout << p_vert_out_h[vert_at(jv, jk, jb)] << ", "; - } - } - } + }; // Verify results - using the appropriate indexing depending on __LOOP_EXCHANGE for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 49c7dec..a67480d 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -204,29 +204,19 @@ TYPED_TEST(RbfInterpolSingleParamTest, C2GradSpecific) { Kokkos::deep_copy(grad_y_h, this->grad_y); // Expected results based on the specific test values - std::vector<TypeParam> expected_grad_x(24); - std::vector<TypeParam> expected_grad_y(24); - int idx = 0; - std::generate(expected_grad_x.begin(), expected_grad_x.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_grad_x = { 19.9225, 22.9275, 26.2225, 20.9675, 24.0725, 27.4675, 22.0125, 25.2175, 28.7125, 23.0575, 26.3625, 29.9575, 38.972, 42.977, 47.272, 41.017, 45.122, 49.517, 43.062, 47.267, 51.762, 45.107, 49.412, 54.007 - }; - return values[idx++]; - }); + }; - idx = 0; - std::generate(expected_grad_y.begin(), expected_grad_y.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_grad_y = { 38.9725, 42.9775, 47.2725, 41.0175, 45.1225, 49.5175, 43.0625, 47.2675, 51.7625, 45.1075, 49.4125, 54.0075, 58.022, 63.027, 68.322, 61.067, 66.172, 71.567, 64.112, 69.317, 74.812, 67.157, 72.462, 78.057 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -480,28 +470,18 @@ TYPED_TEST(RbfInterpolSingleParamTest, CellSpecific) { Kokkos::deep_copy(p_v_out_h, this->p_v_out); // Expected results based on the specific test values - std::vector<TypeParam> expected_u(24); - std::vector<TypeParam> expected_v(24); - int idx = 0; - std::generate(expected_u.begin(), expected_u.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_u = { 18.8216, 20.5356, 22.3396, 19.7576, 21.5616, 23.4556, 20.6936, 22.5876, 24.5716, 21.6296, 23.6136, 25.6876, 36.882, 38.597, 40.402, 38.718, 40.523, 42.418, 40.554, 42.449, 44.434, 42.39, 44.375, 46.45 - }; - return values[idx++]; - }); - idx = 0; - std::generate(expected_v.begin(), expected_v.end(), [&idx]() { - TypeParam values[] = { + }; + std::vector<TypeParam> expected_v = { 36.8616, 38.5756, 40.3796, 38.6976, 40.5016, 42.3956, 40.5336, 42.4276, 44.4116, 42.3696, 44.3536, 46.4276, 54.932, 56.647, 58.452, 57.668, 59.473, 61.368, 60.404, 62.299, 64.284, 63.14, 65.125, 67.2 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -741,17 +721,12 @@ TYPED_TEST(RbfInterpolSingleParamTest, EdgeSpecific) { Kokkos::deep_copy(p_vt_out_h, this->p_vt_out); // Expected results based on the specific test values - std::vector<TypeParam> expected_vt(24); - int idx = 0; - std::generate(expected_vt.begin(), expected_vt.end(), [&idx]() { - TypeParam values[] = { + std::vector<TypeParam> expected_vt = { 7.1304, 8.9324, 10.9644, 7.5364, 9.3784, 11.4504, 7.9424, 9.8244, 11.9364, 8.3484, 10.2704, 12.4224, 14.1502, 16.9522, 19.9842, 14.9562, 17.7982, 20.8702, 15.7622, 18.6442, 21.7562, 16.5682, 19.4902, 22.6422, - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { @@ -1012,28 +987,18 @@ TYPED_TEST(RbfVecInterpolDoubleParamTest, VertexSpecific) { Kokkos::deep_copy(p_v_out_h, this->p_v_out); // Expected results based on the specific test values - std::vector<OutType> expected_u(24); - std::vector<OutType> expected_v(24); - int idx = 0; - std::generate(expected_u.begin(), expected_u.end(), [&idx]() { - OutType values[] = { + std::vector<OutType> expected_u = { 12.3709, 13.5139, 14.7169, 12.9859, 14.1889, 15.4519, 13.6009, 14.8639, 16.1869, 14.2159, 15.5389, 16.9219, 24.4006, 25.5436, 26.7466, 25.6156, 26.8186, 28.0816, 26.8306, 28.0936, 29.4166, 28.0456, 29.3686, 30.7516 - }; - return values[idx++]; - }); - idx = 0; - std::generate(expected_v.begin(), expected_v.end(), [&idx]() { - OutType values[] = { + }; + std::vector<OutType> expected_v = { 24.4009, 25.5439, 26.7469, 25.6159, 26.8189, 28.0819, 26.8309, 28.0939, 29.4169, 28.0459, 29.3689, 30.7519, 36.4306, 37.5736, 38.7766, 38.2456, 39.4486, 40.7116, 40.0606, 41.3236, 42.6466, 41.8756, 43.1986, 44.5816 - }; - return values[idx++]; - }); + }; // Verify results for (int jb = this->i_startblk; jb <= this->i_endblk; ++jb) { -- GitLab From 8e85077c588fe690ec7c25256cccbf5525acb465 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 14 Apr 2025 15:04:37 +0200 Subject: [PATCH 33/34] made a small change --- test/c/test_horizontal_div.cpp | 12 ++++-------- test/c/test_horizontal_rot.cpp | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp index 9f7f4c0..ef95f6b 100644 --- a/test/c/test_horizontal_div.cpp +++ b/test/c/test_horizontal_div.cpp @@ -58,7 +58,9 @@ protected: Kokkos::View<ValueType *, memory_space> opt_out2; HorizontalDivTest() - : vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), + : slev(dim4d, 0), + elev(dim4d, nlev - 1), // Full vertical range (0 .. nlev-1) + vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), cell_edge_idx("cell_edge_idx", dim_combine(nproma, nblks_c, 3)), cell_edge_blk("cell_edge_blk", dim_combine(nproma, nblks_c, 3)), geofac_div("geofac_div", dim_combine(nproma, 3, nblks_c)), @@ -69,13 +71,7 @@ protected: cell_neighbor_blk("cell_neighbor_blk", dim_combine(nproma, nblks_c, 3)), avg_coeff("avg_coeff", dim_combine(nproma, 4, nblks_c)), opt_in2("opt_in2", dim_combine(nproma, nlev, nblks_e)), - opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) { - - // We keep slev and elev as std::vector since they are small and used only - // on the host. - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev - 1); // Full vertical range (0 .. nlev-1) - } + opt_out2("opt_out2", dim_combine(nproma, nlev, nblks_c)) {} }; /// ValueTypes which the divrot tests should run with diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp index f93cddd..ca675a4 100644 --- a/test/c/test_horizontal_rot.cpp +++ b/test/c/test_horizontal_rot.cpp @@ -51,19 +51,15 @@ protected: Kokkos::View<ValueType *, memory_space> f4dout; HorizontalRotVertexTest() - : vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), + : slev(dim4d, 0), + elev(dim4d, nlev - 1), // Full vertical range (0 .. nlev-1) + vec_e("vec_e", dim_combine(nproma, nlev, nblks_e)), vert_edge_idx("vert_edge_idx", dim_combine(nproma, nblks_v, 6)), vert_edge_blk("vert_edge_blk", dim_combine(nproma, nblks_v, 6)), geofac_rot("geofac_rot", dim_combine(nproma, 6, nblks_v)), rot_vec("rot_vec", dim_combine(nproma, nlev, nblks_v)), f4din("f4din", dim_combine(nproma, nlev, nblks_e, dim4d)), - f4dout("f4dout", dim_combine(nproma, nlev, nblks_v, dim4d)) { - - // We keep slev and elev as std::vector since they are small and used only - // on the host. - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev - 1); // Full vertical range (0 .. nlev-1) - } + f4dout("f4dout", dim_combine(nproma, nlev, nblks_v, dim4d)) {} }; /// ValueTypes which the divrot tests should run with -- GitLab From a32290547ad2f0d4cad1b19710f6a3e3acba5a50 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 15 Apr 2025 19:09:06 +0200 Subject: [PATCH 34/34] changed the time for gitlab-ci --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6877a94..5109bb5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,7 +17,7 @@ stages: variables: ACCOUNT_CPU: "ka1125" ACCOUNT_GPU: "bk1341" - SLURM_OPTIONS_CPU: "--account=$ACCOUNT_CPU --partition=shared" + SLURM_OPTIONS_CPU: "--account=$ACCOUNT_CPU --partition=shared --time=00:10:00" SLURM_OPTIONS_GPU: "--account=$ACCOUNT_GPU --partition=gpu --gpus=1" SLURM_NTASKS: "--ntasks=1" GIT_CONFIG_COUNT: 1 -- GitLab