From 5083d1d7be9f5cf95f45d4cc5e8724eb3bed2d93 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Fri, 28 Feb 2025 12:34:52 +0100 Subject: [PATCH 1/5] add compile definition --- CMakeLists.txt | 1 + main.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0903a9e..8dca0f4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ target_link_libraries(demo PUBLIC Kokkos::kokkos) if ("${MU_ARCH}" STREQUAL "a100") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -acc -Minfo=accel -gpu=cc80") + add_compile_definitions(DEMO_DEVICE) target_compile_options(demo PUBLIC "-gpu=pinned") target_link_options(demo PUBLIC "-gpu=pinned") endif() diff --git a/main.cpp b/main.cpp index d0b53ac..ee7d5ae 100644 --- a/main.cpp +++ b/main.cpp @@ -463,7 +463,7 @@ int main() { scenario_1(array, nblocks, nlev, nproma); - #if defined(gpu) + #if defined(DEMO_DEVICE) #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) { #pragma acc host_data use_device(array) @@ -488,7 +488,7 @@ int main() { scenario_7(array, nblocks, nlev, nproma); */ - #if defined(gpu) + #if defined(DEMO_DEVICE) #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) { #pragma acc host_data use_device(array) -- GitLab From 5480e7ab0298192136a36280a63abb1188f49c49 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Fri, 28 Feb 2025 12:56:47 +0100 Subject: [PATCH 2/5] function for openacc calls --- main.cpp | 94 +++++++++++++++++++++----------------------------------- 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/main.cpp b/main.cpp index ee7d5ae..8d83e16 100644 --- a/main.cpp +++ b/main.cpp @@ -59,9 +59,8 @@ void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print = t validate(array, nblocks, nlev, nproma); } -void scenario_1b(double* array, int nblocks, int nlev, int nproma, bool print = true) { - if (print) - std::cout << "scenario 1b: Default layout right; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " +void scenario_1b(double* array, int nblocks, int nlev, int nproma) { + std::cout << "scenario 1b: Default layout right; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl; Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma); @@ -79,7 +78,7 @@ void scenario_1b(double* array, int nblocks, int nlev, int nproma, bool print = }); Kokkos::fence(); - if (print) printf("Time = %f ms\n\n", timer.seconds() * 1000); + printf("Time = %f ms\n\n", timer.seconds() * 1000); } @@ -361,10 +360,9 @@ void scenario_7(double* array, int nblocks, int nlev, int nproma, bool print=tru validate(array, nblocks, nlev, nproma); } -void scenario_7b(double* array, int nblocks, int nlev, int nproma, bool print=true) { +void scenario_7b(double* array, int nblocks, int nlev, int nproma) { - if(print) - std::cout << "scenario 7b: +ACC Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl; + std::cout << "scenario 7b: +ACC Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl; Kokkos::View<double***, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma); @@ -388,14 +386,11 @@ void scenario_7b(double* array, int nblocks, int nlev, int nproma, bool print=tr }); Kokkos::fence(); - - if(print) - printf("Time = %f ms\n\n", timer.seconds() * 1000); + printf("Time = %f ms\n\n", timer.seconds() * 1000); } -void scenario_8(double* array, int nblocks, int nlev, int nproma, bool print=true) { - if(print) - std::cout << "scenario 8: Default Layout as ICON pointer view(array, nproma, nlev, nblocks); d_view(jb, jk, jc) ----- " << std::endl; +void scenario_8(double* array, int nblocks, int nlev, int nproma) { + std::cout << "scenario 8: Default Layout as ICON pointer view(array, nproma, nlev, nblocks); d_view(jb, jk, jc) ----- " << std::endl; using space_t = Kokkos::DefaultExecutionSpace::memory_space; Kokkos::View<double***, Kokkos::MemoryUnmanaged> d_view(array, nproma, nlev, nblocks); @@ -421,11 +416,28 @@ void scenario_8(double* array, int nblocks, int nlev, int nproma, bool print=tru Kokkos::fence(); - if(print) - printf("Time = %f ms\n\n", timer.seconds() * 1000); + printf("Time = %f ms\n\n", timer.seconds() * 1000); } +void openacc_calls(double* array, int nblocks, int nlev, int nproma, + std::function<void(double*, int, int, int)> func){ + #if defined(DEMO_DEVICE) + #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) + { + #pragma acc host_data use_device(array) + { + func(array, nblocks, nlev, nproma); + } + } + #pragma acc exit data copyout(array[0:nblocks*nlev*nproma]) + #else + scenario_1b(array, nblocks, nlev, nproma); + #endif + validate(array, nblocks, nlev, nproma); + +} + int main() { int ncells = atoi(std::getenv("NCELLS")); int nlev = atoi(std::getenv("NLEV")); @@ -461,22 +473,14 @@ int main() { { scenario_1(array, nblocks, nlev, nproma, false); - scenario_1(array, nblocks, nlev, nproma); - - #if defined(DEMO_DEVICE) - #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) - { - #pragma acc host_data use_device(array) - { - scenario_1b(array, nblocks, nlev, nproma); - } - } - #pragma acc exit data copyout(array[0:nblocks*nlev*nproma]) - #else - scenario_1b(array, nblocks, nlev, nproma); - #endif - validate(array, nblocks, nlev, nproma); - + //scenario_1(array, nblocks, nlev, nproma); + + std::function<void(double*, int, int, int)> s_1b = scenario_1b; + openacc_calls(array, nblocks, nlev, nproma, s_1b); + std::function<void(double*, int, int, int)> s_7b = scenario_7b; + openacc_calls(array, nblocks, nlev, nproma, s_7b); + std::function<void(double*, int, int, int)> s_8 = scenario_8; + openacc_calls(array, nblocks, nlev, nproma, s_8); /* scenario_2(array, nblocks, nlev, nproma); scenario_2b(array, nblocks, nlev, nproma); @@ -488,34 +492,6 @@ int main() { scenario_7(array, nblocks, nlev, nproma); */ - #if defined(DEMO_DEVICE) - #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) - { - #pragma acc host_data use_device(array) - { - scenario_7b(array, nblocks, nlev, nproma); - } - } - #pragma acc exit data copyout(array[0:nblocks*nlev*nproma]) - #else - scenario_7b(array, nblocks, nlev, nproma); - #endif - validate(array, nblocks, nlev, nproma); - - #if defined(gpu) - #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) - { - #pragma acc host_data use_device(array) - { - scenario_8(array, nblocks, nlev, nproma); - } - } - #pragma acc exit data copyout(array[0:nblocks*nlev*nproma]) - #else - scenario_8(array, nblocks, nlev, nproma); - #endif - validate(array, nblocks, nlev, nproma); - } Kokkos::finalize(); -- GitLab From 2c43d45771ef39670556e9fd5e5946bf6ab24cc7 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Fri, 28 Feb 2025 13:27:36 +0100 Subject: [PATCH 3/5] fix inconsistencies --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 8d83e16..4eae412 100644 --- a/main.cpp +++ b/main.cpp @@ -432,7 +432,7 @@ void openacc_calls(double* array, int nblocks, int nlev, int nproma, } #pragma acc exit data copyout(array[0:nblocks*nlev*nproma]) #else - scenario_1b(array, nblocks, nlev, nproma); + func(array, nblocks, nlev, nproma); #endif validate(array, nblocks, nlev, nproma); -- GitLab From cb50e6c561bf92b22b9f8c334bc7f5a3dbfe9df4 Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Fri, 28 Feb 2025 13:42:32 +0100 Subject: [PATCH 4/5] add DEMO_DEVICE everywhere --- main.cpp | 64 +++++++------------------------------------------------- 1 file changed, 8 insertions(+), 56 deletions(-) diff --git a/main.cpp b/main.cpp index 4eae412..3e94887 100644 --- a/main.cpp +++ b/main.cpp @@ -234,12 +234,10 @@ void scenario_4b(double* array, int nblocks, int nlev, int nproma, bool print = validate(array, nblocks, nlev, nproma); } -#if defined(KOKKOS_ENABLE_CUDA) or defined(KOKKOS_ENABLE_HIP) -#define gpu 1 -using Layout = Kokkos::LayoutLeft; +#if defined(DEMO_DEVICE) + using Layout = Kokkos::LayoutLeft; #else -#undef gpu -using Layout = Kokkos::LayoutRight; + using Layout = Kokkos::LayoutRight; #endif template <class T> struct LoopFunctor { @@ -254,7 +252,7 @@ template <class T> struct LoopFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int jc) const { for (int jk = 0; jk < nlev; ++jk) { -#if defined(gpu) +#if defined(DEMO_DEVICE) int p = jb * nlev * nproma + jk * nproma + jc; #else int p = jc * nlev * nblocks + jk * nblocks + jb; @@ -311,7 +309,7 @@ void scenario_6(double* array, int nblocks, int nlev, int nproma, bool print = t Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, nproma_tmp), [&](const int jc) { // sequential over the levels for (int jk = 0; jk < nlev; ++jk) { -#if defined(gpu) +#if defined(DEMO_DEVICE) int p = jb * nlev * nproma + jk * nproma + jc; #else int p = jc * nlev * nblocks + jk * nblocks + jb; @@ -372,7 +370,7 @@ void scenario_7b(double* array, int nblocks, int nlev, int nproma) { for (int jc = 0 ; jc < nproma; ++jc) { for (int jk = 0; jk < nlev; ++jk) { -#if defined(gpu) +#if defined(DEMO_DEVICE) int p = jc * nlev * nblocks + jk * nblocks + jb; #else int p = jb * nlev * nproma + jk * nproma + jc; @@ -403,7 +401,7 @@ void scenario_8(double* array, int nblocks, int nlev, int nproma) { for (int jk = 0; jk < nlev; ++jk) { -#if defined(gpu) +#if defined(DEMO_DEVICE) int p = jb * nlev * nproma + jk * nproma + jc; #else int p = jc * nlev * nblocks + jk * nblocks + jb; @@ -447,28 +445,6 @@ int main() { std::cout << "nblocks=" << nblocks << ", nlev=" << nlev << ", nproma=" << nproma << std::endl; double array[nblocks * nlev * nproma]; - /* - for (int jb = 0; jb < nblocks; ++jb) - for (int jk = 0; jk < nlev; ++jk) - for (int jc = 0; jc < nproma; ++jc) { - int p = jb * nlev * nproma + jk * nproma + jc; - array[p] = 1; //static_cast<double>(p); - } - */ - /* - for (int i = 0; i < nblocks * nlev * nproma; ++i) - std::cout << array[i] << " " ; - std::cout << "\n"; - - - for (int jb = 0; jb < nblocks; ++jb) - for (int jk = 0; jk < nlev; ++jk){ - for (int jc = 0; jc < nproma; ++jc) - std::cout << view(jb, jk, jc)<< " "; - std::cout << "\n"; - } - */ - Kokkos::initialize(); { scenario_1(array, nblocks, nlev, nproma, false); @@ -496,28 +472,4 @@ int main() { Kokkos::finalize(); return 0; -} - -/** - * - * #if 0 - Kokkos::parallel_for( - "print", md_range_policy({0, 0, 0}, {nblocks, nlev, nproma}), - KOKKOS_LAMBDA(const int jb, const int jk, const int jc) { - int p = jb * nlev * nproma + jk * nproma + jc; - d_view(jb, jk, jc) += p; - printf("%f ", d_view(jb, jk, jc)); - }); - std::cout << "\n"; -#endif - for (int jb = 0 ; jb < nblocks; ++jb) - Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) { - for (int jk = 0; jk < nlev; ++jk) { - // int p = jb * nlev * nproma + jk * nproma + jc; left - int p = jc * nlev * nblocks + jk * nblocks + jb; - // d_view(jb, jk, jc) = p; - d_view(jc, jk, jb) = p; - - // printf("%f ", d_view(jb, jk, jc)); - }}); -*/ +} \ No newline at end of file -- GitLab From f073afac01b9461bce68f7718a86c2c44de9e34e Mon Sep 17 00:00:00 2001 From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de> Date: Fri, 28 Feb 2025 13:48:08 +0100 Subject: [PATCH 5/5] memset to reinitialise the array --- main.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 3e94887..f869a4b 100644 --- a/main.cpp +++ b/main.cpp @@ -450,11 +450,16 @@ int main() { scenario_1(array, nblocks, nlev, nproma, false); //scenario_1(array, nblocks, nlev, nproma); - + + memset(array, 0.0, sizeof(array)); std::function<void(double*, int, int, int)> s_1b = scenario_1b; openacc_calls(array, nblocks, nlev, nproma, s_1b); + + memset(array, 0.0, sizeof(array)); std::function<void(double*, int, int, int)> s_7b = scenario_7b; openacc_calls(array, nblocks, nlev, nproma, s_7b); + + memset(array, 0.0, sizeof(array)); std::function<void(double*, int, int, int)> s_8 = scenario_8; openacc_calls(array, nblocks, nlev, nproma, s_8); /* -- GitLab