From 5083d1d7be9f5cf95f45d4cc5e8724eb3bed2d93 Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Fri, 28 Feb 2025 12:34:52 +0100
Subject: [PATCH 1/5] add compile definition

---
 CMakeLists.txt | 1 +
 main.cpp       | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0903a9e..8dca0f4 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,6 +41,7 @@ target_link_libraries(demo PUBLIC Kokkos::kokkos)
 
 if ("${MU_ARCH}" STREQUAL "a100")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -acc -Minfo=accel -gpu=cc80")
+    add_compile_definitions(DEMO_DEVICE)
     target_compile_options(demo PUBLIC "-gpu=pinned")
     target_link_options(demo PUBLIC "-gpu=pinned")
 endif()
diff --git a/main.cpp b/main.cpp
index d0b53ac..ee7d5ae 100644
--- a/main.cpp
+++ b/main.cpp
@@ -463,7 +463,7 @@ int main() {
 
     scenario_1(array, nblocks, nlev, nproma);
     
-    #if defined(gpu)
+    #if defined(DEMO_DEVICE)
     #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
     {
       #pragma acc host_data use_device(array) 
@@ -488,7 +488,7 @@ int main() {
     scenario_7(array, nblocks, nlev, nproma);
 */
 
-    #if defined(gpu)
+    #if defined(DEMO_DEVICE)
     #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
     {
       #pragma acc host_data use_device(array) 
-- 
GitLab


From 5480e7ab0298192136a36280a63abb1188f49c49 Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Fri, 28 Feb 2025 12:56:47 +0100
Subject: [PATCH 2/5] function for openacc calls

---
 main.cpp | 94 +++++++++++++++++++++-----------------------------------
 1 file changed, 35 insertions(+), 59 deletions(-)

diff --git a/main.cpp b/main.cpp
index ee7d5ae..8d83e16 100644
--- a/main.cpp
+++ b/main.cpp
@@ -59,9 +59,8 @@ void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print = t
   validate(array, nblocks, nlev, nproma);
 }
 
-void scenario_1b(double* array, int nblocks, int nlev, int nproma, bool print = true) {
-  if (print)
-    std::cout << "scenario 1b: Default layout right; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- "
+void scenario_1b(double* array, int nblocks, int nlev, int nproma) {
+  std::cout << "scenario 1b: Default layout right; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- "
               << std::endl;
 
   Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma);
@@ -79,7 +78,7 @@ void scenario_1b(double* array, int nblocks, int nlev, int nproma, bool print =
         });
   Kokkos::fence();
 
-  if (print) printf("Time = %f ms\n\n", timer.seconds() * 1000);
+  printf("Time = %f ms\n\n", timer.seconds() * 1000);
 
 }
 
@@ -361,10 +360,9 @@ void scenario_7(double* array, int nblocks, int nlev, int nproma, bool print=tru
     validate(array, nblocks, nlev, nproma);
 }
 
-void scenario_7b(double* array, int nblocks, int nlev, int nproma, bool print=true) {
+void scenario_7b(double* array, int nblocks, int nlev, int nproma) {
 
-    if(print)
-        std::cout << "scenario 7b: +ACC Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
+    std::cout << "scenario 7b: +ACC Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
 
     Kokkos::View<double***, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma);
 
@@ -388,14 +386,11 @@ void scenario_7b(double* array, int nblocks, int nlev, int nproma, bool print=tr
       });
 
     Kokkos::fence();
-
-    if(print)
-        printf("Time = %f ms\n\n", timer.seconds() * 1000);
+    printf("Time = %f ms\n\n", timer.seconds() * 1000);
 }
 
-void scenario_8(double* array, int nblocks, int nlev, int nproma, bool print=true) {
-   if(print)
-        std::cout << "scenario 8: Default Layout as ICON pointer view(array, nproma, nlev, nblocks); d_view(jb, jk, jc) ----- " << std::endl;
+void scenario_8(double* array, int nblocks, int nlev, int nproma) {
+    std::cout << "scenario 8: Default Layout as ICON pointer view(array, nproma, nlev, nblocks); d_view(jb, jk, jc) ----- " << std::endl;
 
     using space_t = Kokkos::DefaultExecutionSpace::memory_space;
     Kokkos::View<double***, Kokkos::MemoryUnmanaged> d_view(array, nproma, nlev, nblocks);
@@ -421,11 +416,28 @@ void scenario_8(double* array, int nblocks, int nlev, int nproma, bool print=tru
         
     Kokkos::fence();
 
-    if(print)
-        printf("Time = %f ms\n\n", timer.seconds() * 1000);
+    printf("Time = %f ms\n\n", timer.seconds() * 1000);
 
 }
 
+void openacc_calls(double* array, int nblocks, int nlev, int nproma, 
+                      std::function<void(double*, int, int, int)> func){
+  #if defined(DEMO_DEVICE)
+    #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
+    {
+      #pragma acc host_data use_device(array) 
+      {
+        func(array, nblocks, nlev, nproma);
+      }
+    }
+    #pragma acc exit data copyout(array[0:nblocks*nlev*nproma])
+  #else
+      scenario_1b(array, nblocks, nlev, nproma);
+  #endif
+    validate(array, nblocks, nlev, nproma);
+   
+}
+
 int main() {
   int ncells  = atoi(std::getenv("NCELLS"));
   int nlev    = atoi(std::getenv("NLEV"));
@@ -461,22 +473,14 @@ int main() {
   {
     scenario_1(array, nblocks, nlev, nproma, false);
 
-    scenario_1(array, nblocks, nlev, nproma);
-    
-    #if defined(DEMO_DEVICE)
-    #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
-    {
-      #pragma acc host_data use_device(array) 
-      {
-        scenario_1b(array, nblocks, nlev, nproma);
-      }
-    }
-    #pragma acc exit data copyout(array[0:nblocks*nlev*nproma])
-    #else
-      scenario_1b(array, nblocks, nlev, nproma);
-    #endif
-    validate(array, nblocks, nlev, nproma);
-   
+    //scenario_1(array, nblocks, nlev, nproma);
+
+    std::function<void(double*, int, int, int)> s_1b = scenario_1b;
+    openacc_calls(array, nblocks, nlev, nproma, s_1b);
+    std::function<void(double*, int, int, int)> s_7b = scenario_7b;
+    openacc_calls(array, nblocks, nlev, nproma, s_7b);
+    std::function<void(double*, int, int, int)> s_8 = scenario_8;
+    openacc_calls(array, nblocks, nlev, nproma, s_8);
 /*
     scenario_2(array, nblocks, nlev, nproma);
     scenario_2b(array, nblocks, nlev, nproma);
@@ -488,34 +492,6 @@ int main() {
     scenario_7(array, nblocks, nlev, nproma);
 */
 
-    #if defined(DEMO_DEVICE)
-    #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
-    {
-      #pragma acc host_data use_device(array) 
-      {
-        scenario_7b(array, nblocks, nlev, nproma);
-      }
-    }
-    #pragma acc exit data copyout(array[0:nblocks*nlev*nproma])
-    #else
-      scenario_7b(array, nblocks, nlev, nproma);
-    #endif
-    validate(array, nblocks, nlev, nproma);
-
-    #if defined(gpu)
-    #pragma acc enter data copyin(array[0:nblocks*nlev*nproma]) 
-    {
-      #pragma acc host_data use_device(array) 
-      {
-        scenario_8(array, nblocks, nlev, nproma);
-      }
-    }
-    #pragma acc exit data copyout(array[0:nblocks*nlev*nproma])
-    #else
-      scenario_8(array, nblocks, nlev, nproma);
-    #endif
-    validate(array, nblocks, nlev, nproma);
-
   }
   Kokkos::finalize();
 
-- 
GitLab


From 2c43d45771ef39670556e9fd5e5946bf6ab24cc7 Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Fri, 28 Feb 2025 13:27:36 +0100
Subject: [PATCH 3/5] fix inconsistencies

---
 main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 8d83e16..4eae412 100644
--- a/main.cpp
+++ b/main.cpp
@@ -432,7 +432,7 @@ void openacc_calls(double* array, int nblocks, int nlev, int nproma,
     }
     #pragma acc exit data copyout(array[0:nblocks*nlev*nproma])
   #else
-      scenario_1b(array, nblocks, nlev, nproma);
+      func(array, nblocks, nlev, nproma);
   #endif
     validate(array, nblocks, nlev, nproma);
    
-- 
GitLab


From cb50e6c561bf92b22b9f8c334bc7f5a3dbfe9df4 Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Fri, 28 Feb 2025 13:42:32 +0100
Subject: [PATCH 4/5] add DEMO_DEVICE everywhere

---
 main.cpp | 64 +++++++-------------------------------------------------
 1 file changed, 8 insertions(+), 56 deletions(-)

diff --git a/main.cpp b/main.cpp
index 4eae412..3e94887 100644
--- a/main.cpp
+++ b/main.cpp
@@ -234,12 +234,10 @@ void scenario_4b(double* array, int nblocks, int nlev, int nproma, bool print =
   validate(array, nblocks, nlev, nproma);
 }
 
-#if defined(KOKKOS_ENABLE_CUDA) or defined(KOKKOS_ENABLE_HIP)
-#define gpu 1
-using Layout = Kokkos::LayoutLeft;
+#if defined(DEMO_DEVICE)
+  using Layout = Kokkos::LayoutLeft;
 #else
-#undef gpu
-using Layout = Kokkos::LayoutRight;
+  using Layout = Kokkos::LayoutRight;
 #endif
 
 template <class T> struct LoopFunctor {
@@ -254,7 +252,7 @@ template <class T> struct LoopFunctor {
   KOKKOS_INLINE_FUNCTION
   void operator()(const int jc) const {
     for (int jk = 0; jk < nlev; ++jk) {
-#if defined(gpu)
+#if defined(DEMO_DEVICE)
       int p = jb * nlev * nproma + jk * nproma + jc;
 #else
       int p = jc * nlev * nblocks + jk * nblocks + jb;
@@ -311,7 +309,7 @@ void scenario_6(double* array, int nblocks, int nlev, int nproma, bool print = t
         Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, nproma_tmp), [&](const int jc) {
           // sequential over the levels
           for (int jk = 0; jk < nlev; ++jk) {
-#if defined(gpu)
+#if defined(DEMO_DEVICE)
             int p = jb * nlev * nproma + jk * nproma + jc;
 #else 
                             int p = jc * nlev * nblocks + jk * nblocks + jb;
@@ -372,7 +370,7 @@ void scenario_7b(double* array, int nblocks, int nlev, int nproma) {
       for (int jc = 0 ; jc < nproma; ++jc) {
         for (int jk = 0; jk < nlev; ++jk) {
             
-#if defined(gpu)
+#if defined(DEMO_DEVICE)
             int p = jc * nlev * nblocks + jk * nblocks + jb;
 #else 
             int p = jb * nlev * nproma + jk * nproma + jc;
@@ -403,7 +401,7 @@ void scenario_8(double* array, int nblocks, int nlev, int nproma) {
           for (int jk = 0; jk < nlev; ++jk) {
 
 
-#if defined(gpu)
+#if defined(DEMO_DEVICE)
             int p = jb * nlev * nproma + jk * nproma + jc;
 #else 
             int p = jc * nlev * nblocks + jk * nblocks + jb;
@@ -447,28 +445,6 @@ int main() {
   std::cout << "nblocks=" << nblocks << ", nlev=" << nlev << ", nproma=" << nproma << std::endl;
 
   double array[nblocks * nlev * nproma];
-  /*
-     for (int jb = 0; jb < nblocks; ++jb)
-         for (int jk = 0; jk < nlev; ++jk)
-             for (int jc = 0; jc < nproma; ++jc) {
-                 int p = jb * nlev * nproma + jk * nproma + jc;
-                 array[p] = 1; //static_cast<double>(p);
-             }
- */
-  /*
-      for (int i = 0; i < nblocks * nlev * nproma; ++i)
-          std::cout << array[i] << " " ;
-      std::cout << "\n";
-
-
-     for (int jb = 0; jb < nblocks; ++jb)
-          for (int jk = 0; jk < nlev; ++jk){
-              for (int jc = 0; jc < nproma; ++jc)
-                  std::cout << view(jb, jk, jc)<< " ";
-            std::cout << "\n";
-          }
-  */
-
   Kokkos::initialize();
   {
     scenario_1(array, nblocks, nlev, nproma, false);
@@ -496,28 +472,4 @@ int main() {
   Kokkos::finalize();
 
   return 0;
-}
-
-/**
- *
- * #if 0
-    Kokkos::parallel_for(
-      "print", md_range_policy({0, 0, 0}, {nblocks, nlev, nproma}),
-      KOKKOS_LAMBDA(const int jb, const int jk, const int jc) {
-            int p = jb * nlev * nproma + jk * nproma + jc;
-            d_view(jb, jk, jc) += p;
-            printf("%f ", d_view(jb, jk, jc));
-    });
-    std::cout << "\n";
-#endif
-    for (int jb = 0 ; jb < nblocks; ++jb)
-    Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
-        for (int jk = 0; jk < nlev; ++jk) {
-          //  int p = jb * nlev * nproma + jk * nproma + jc;  left
-            int p = jc * nlev * nblocks + jk * nblocks + jb;
-           // d_view(jb, jk, jc) = p;
-            d_view(jc, jk, jb) = p;
-
-          //  printf("%f ", d_view(jb, jk, jc));
-        }});
-*/
+}
\ No newline at end of file
-- 
GitLab


From f073afac01b9461bce68f7718a86c2c44de9e34e Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Fri, 28 Feb 2025 13:48:08 +0100
Subject: [PATCH 5/5] memset to reinitialise the array

---
 main.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 3e94887..f869a4b 100644
--- a/main.cpp
+++ b/main.cpp
@@ -450,11 +450,16 @@ int main() {
     scenario_1(array, nblocks, nlev, nproma, false);
 
     //scenario_1(array, nblocks, nlev, nproma);
-
+    
+    memset(array, 0.0, sizeof(array));
     std::function<void(double*, int, int, int)> s_1b = scenario_1b;
     openacc_calls(array, nblocks, nlev, nproma, s_1b);
+    
+    memset(array, 0.0, sizeof(array));
     std::function<void(double*, int, int, int)> s_7b = scenario_7b;
     openacc_calls(array, nblocks, nlev, nproma, s_7b);
+
+    memset(array, 0.0, sizeof(array));
     std::function<void(double*, int, int, int)> s_8 = scenario_8;
     openacc_calls(array, nblocks, nlev, nproma, s_8);
 /*
-- 
GitLab