From 57074d1ee4916ef4d821eb7f4c04d5a4538be33b Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 18:27:18 +0100
Subject: [PATCH 01/10] copy_2d_dpsp routine

---
 src/mo_fortran_tools.F90 | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 204ff1f..8a8640a 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -165,6 +165,7 @@ MODULE mo_fortran_tools
     MODULE PROCEDURE copy_3d_sp
     MODULE PROCEDURE copy_4d_sp
     MODULE PROCEDURE copy_5d_sp
+    MODULE PROCEDURE copy_2d_dpsp
     MODULE PROCEDURE copy_3d_dpsp
     MODULE PROCEDURE copy_2d_spdp
     MODULE PROCEDURE copy_3d_spdp
@@ -848,6 +849,36 @@ CONTAINS
     CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_sp
 
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_2d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, m1, m2
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2)
+#else
+!$omp do collapse(2)
+#endif
+    DO i2 = 1, m2
+      DO i1 = 1, m1
+        dest(i1, i2) = REAL(src(i1, i2), KIND=sp)
+      END DO
+    END DO
+!$omp end do nowait
+    CALL acc_wait_if_requested(1, opt_acc_async)
+
+  END SUBROUTINE copy_2d_dpsp
+
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_3d_dpsp(src, dest, lacc, opt_acc_async)
     REAL(dp), INTENT(IN) :: src(:, :, :)
-- 
GitLab


From 39e57fdc9f947002f722e24c4d1436cf934f9198 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 18:40:26 +0100
Subject: [PATCH 02/10] fix copy_2d_dpsp

---
 src/mo_fortran_tools.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 8a8640a..64dcb8a 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -863,7 +863,7 @@ CONTAINS
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
-- 
GitLab


From 72a30d01e520abdab1ce47b907935dfb050b4beb Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 18:40:40 +0100
Subject: [PATCH 03/10] test for copy_{2d,3d}_dpsp

---
 test/fortran/test_fortran_tools.f90 | 32 +++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/test/fortran/test_fortran_tools.f90 b/test/fortran/test_fortran_tools.f90
index 36012a2..ed42a2b 100644
--- a/test/fortran/test_fortran_tools.f90
+++ b/test/fortran/test_fortran_tools.f90
@@ -414,6 +414,34 @@ CONTAINS
     CALL ASSERT_EQUAL(assert_real_sp_5d_array(src, dest), .TRUE.)
   END SUBROUTINE
 
+  SUBROUTINE Test_copy_2d_dpsp
+    REAL(dp) :: src(10, 8) = 1.0
+    REAL(sp) :: dest(10, 8)
+
+    CALL TAG_TEST("Test_copy_2d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_2d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
+  SUBROUTINE Test_copy_3d_dpsp
+    REAL(dp) :: src(10, 8, 6) = 1.0
+    REAL(sp) :: dest(10, 8, 6)
+
+    CALL TAG_TEST("Test_copy_3d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_3d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
   SUBROUTINE Test_copy_2d_spdp
     REAL(sp) :: src(10, 10) = 1.0
     REAL(dp) :: dest(10, 10)
@@ -1845,7 +1873,7 @@ CONTAINS
     assert_real_spdp_2d_array = .TRUE.
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
-        IF (array1(i, j) /= array2(i, j)) THEN
+        IF (array1(i, j) /= REAL(array2(i, j),KIND=sp)) THEN
           assert_real_spdp_2d_array = .FALSE.
           EXIT
         END IF
@@ -1862,7 +1890,7 @@ CONTAINS
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
         DO k = 1, SIZE(array1, 3)
-          IF (array1(i, j, k) /= array2(i, j, k)) THEN
+          IF (array1(i, j, k) /= REAL(array2(i, j, k),KIND=sp)) THEN
             assert_real_spdp_3d_array = .FALSE.
             EXIT
           END IF
-- 
GitLab


From ce22bd387336aa855a5e547bb8a8cedc300f7fcb Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 18:46:42 +0100
Subject: [PATCH 04/10] make format

---
 test/fortran/test_fortran_tools.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/fortran/test_fortran_tools.f90 b/test/fortran/test_fortran_tools.f90
index ed42a2b..a3c9a56 100644
--- a/test/fortran/test_fortran_tools.f90
+++ b/test/fortran/test_fortran_tools.f90
@@ -1873,7 +1873,7 @@ CONTAINS
     assert_real_spdp_2d_array = .TRUE.
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
-        IF (array1(i, j) /= REAL(array2(i, j),KIND=sp)) THEN
+        IF (array1(i, j) /= REAL(array2(i, j), KIND=sp)) THEN
           assert_real_spdp_2d_array = .FALSE.
           EXIT
         END IF
@@ -1890,7 +1890,7 @@ CONTAINS
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
         DO k = 1, SIZE(array1, 3)
-          IF (array1(i, j, k) /= REAL(array2(i, j, k),KIND=sp)) THEN
+          IF (array1(i, j, k) /= REAL(array2(i, j, k), KIND=sp)) THEN
             assert_real_spdp_3d_array = .FALSE.
             EXIT
           END IF
-- 
GitLab


From 41d06856b11ad9eda1de5821aa111d3c5f11fcb3 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 19:18:06 +0100
Subject: [PATCH 05/10] manual reset of mo_fortran_tools from master

---
 src/mo_fortran_tools.F90 | 70 ++--------------------------------------
 1 file changed, 3 insertions(+), 67 deletions(-)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 64dcb8a..9a00092 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -165,7 +165,6 @@ MODULE mo_fortran_tools
     MODULE PROCEDURE copy_3d_sp
     MODULE PROCEDURE copy_4d_sp
     MODULE PROCEDURE copy_5d_sp
-    MODULE PROCEDURE copy_2d_dpsp
     MODULE PROCEDURE copy_3d_dpsp
     MODULE PROCEDURE copy_2d_spdp
     MODULE PROCEDURE copy_3d_spdp
@@ -864,7 +863,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#if (defined(__INTEL_COMPILER))
+#ifdef __INTEL_COMPILER
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
@@ -877,7 +876,8 @@ CONTAINS
 !$omp end do nowait
     CALL acc_wait_if_requested(1, opt_acc_async)
 
-  END SUBROUTINE copy_2d_dpsp
+  END SUBROUTINE copy_3d_dpsp
+
 
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_3d_dpsp(src, dest, lacc, opt_acc_async)
@@ -2261,22 +2261,6 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_dp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2338,22 +2322,6 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_sp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2415,22 +2383,6 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_i4_3_2_s(ptr_out, ptr_in(1, 1), &
                                      base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2490,22 +2442,6 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_l_3_2_s(ptr_out, ptr_in(1, 1), &
                                     base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
-- 
GitLab


From 10c076f30f001fd41e8234ee71fd437ad7555733 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Fri, 28 Feb 2025 19:21:36 +0100
Subject: [PATCH 06/10] fix interface

---
 src/mo_fortran_tools.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 9a00092..83a7fa3 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -165,6 +165,7 @@ MODULE mo_fortran_tools
     MODULE PROCEDURE copy_3d_sp
     MODULE PROCEDURE copy_4d_sp
     MODULE PROCEDURE copy_5d_sp
+    MODULE PROCEDURE copy_2d_dpsp
     MODULE PROCEDURE copy_3d_dpsp
     MODULE PROCEDURE copy_2d_spdp
     MODULE PROCEDURE copy_3d_spdp
@@ -876,8 +877,7 @@ CONTAINS
 !$omp end do nowait
     CALL acc_wait_if_requested(1, opt_acc_async)
 
-  END SUBROUTINE copy_3d_dpsp
-
+  END SUBROUTINE copy_2d_dpsp
 
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_3d_dpsp(src, dest, lacc, opt_acc_async)
-- 
GitLab


From 985075e0539b3ab1a8308ddd0506758f92a9fd24 Mon Sep 17 00:00:00 2001
From: Yen-Chen Chen <yen-chen.chen@tum.de>
Date: Tue, 4 Mar 2025 08:53:14 +0100
Subject: [PATCH 07/10] Fix rebase

---
 src/mo_fortran_tools.F90 | 64 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 83a7fa3..ef7d42f 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -2261,6 +2261,22 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_dp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
+      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
+          .OR. base_shape(1) /= in_shape(1)) THEN
+        out_stride(1) = in_stride(1)
+        out_stride(2) = 1
+        out_shape(1:out_rank - 1) = in_shape
+        DO i = out_rank, new_dim_rank + 1, -1
+          out_shape(i) = out_shape(i - 1)
+          out_stride(i) = out_stride(i - 1)
+        END DO
+        out_stride(new_dim_rank) = 1
+        out_shape(new_dim_rank) = 1
+        out_shape = (out_shape - 1)*out_stride + 1
+        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
+             &             :out_shape(2):out_stride(2), &
+             &             :out_shape(3):out_stride(3))
+      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2322,6 +2338,22 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_sp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
+      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
+          .OR. base_shape(1) /= in_shape(1)) THEN
+        out_stride(1) = in_stride(1)
+        out_stride(2) = 1
+        out_shape(1:out_rank - 1) = in_shape
+        DO i = out_rank, new_dim_rank + 1, -1
+          out_shape(i) = out_shape(i - 1)
+          out_stride(i) = out_stride(i - 1)
+        END DO
+        out_stride(new_dim_rank) = 1
+        out_shape(new_dim_rank) = 1
+        out_shape = (out_shape - 1)*out_stride + 1
+        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
+             &             :out_shape(2):out_stride(2), &
+             &             :out_shape(3):out_stride(3))
+      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2383,6 +2415,22 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_i4_3_2_s(ptr_out, ptr_in(1, 1), &
                                      base_shape, new_dim_rank)
+      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
+          .OR. base_shape(1) /= in_shape(1)) THEN
+        out_stride(1) = in_stride(1)
+        out_stride(2) = 1
+        out_shape(1:out_rank - 1) = in_shape
+        DO i = out_rank, new_dim_rank + 1, -1
+          out_shape(i) = out_shape(i - 1)
+          out_stride(i) = out_stride(i - 1)
+        END DO
+        out_stride(new_dim_rank) = 1
+        out_shape(new_dim_rank) = 1
+        out_shape = (out_shape - 1)*out_stride + 1
+        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
+             &             :out_shape(2):out_stride(2), &
+             &             :out_shape(3):out_stride(3))
+      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2442,6 +2490,22 @@ CONTAINS
       base_shape(2) = in_shape(2)
       CALL insert_dimension_l_3_2_s(ptr_out, ptr_in(1, 1), &
                                     base_shape, new_dim_rank)
+      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
+          .OR. base_shape(1) /= in_shape(1)) THEN
+        out_stride(1) = in_stride(1)
+        out_stride(2) = 1
+        out_shape(1:out_rank - 1) = in_shape
+        DO i = out_rank, new_dim_rank + 1, -1
+          out_shape(i) = out_shape(i - 1)
+          out_stride(i) = out_stride(i - 1)
+        END DO
+        out_stride(new_dim_rank) = 1
+        out_shape(new_dim_rank) = 1
+        out_shape = (out_shape - 1)*out_stride + 1
+        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
+             &             :out_shape(2):out_stride(2), &
+             &             :out_shape(3):out_stride(3))
+      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
-- 
GitLab


From 90fe08eb26a26424c664904eec4d6fbe756bc8a8 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 4 Mar 2025 16:49:11 +0100
Subject: [PATCH 08/10] Consistent ifdefs

---
 src/mo_fortran_tools.F90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index ef7d42f..eefa54b 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -564,7 +564,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#ifdef __INTEL_COMPILER
+#if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
@@ -726,7 +726,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#ifdef __INTEL_COMPILER
+#if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
@@ -864,7 +864,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#ifdef __INTEL_COMPILER
+#if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
-- 
GitLab


From 7bf36f9dc28c03176f302395bf74d037c753c54f Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 4 Mar 2025 16:50:28 +0100
Subject: [PATCH 09/10] Adding 4d+5d versions of copy dp to sp

---
 src/mo_fortran_tools.F90 | 77 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index eefa54b..3509e70 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -167,6 +167,8 @@ MODULE mo_fortran_tools
     MODULE PROCEDURE copy_5d_sp
     MODULE PROCEDURE copy_2d_dpsp
     MODULE PROCEDURE copy_3d_dpsp
+    MODULE PROCEDURE copy_4d_dpsp
+    MODULE PROCEDURE copy_5d_dpsp
     MODULE PROCEDURE copy_2d_spdp
     MODULE PROCEDURE copy_3d_spdp
     MODULE PROCEDURE copy_4d_spdp
@@ -912,6 +914,81 @@ CONTAINS
 
   END SUBROUTINE copy_3d_dpsp
 
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_4d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :, :, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :, :, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+    m3 = SIZE(dest, 3)
+    m4 = SIZE(dest, 4)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2,i3,i4)
+#else
+!$omp do collapse(4)
+#endif
+    DO i4 = 1, m4
+      DO i3 = 1, m3
+        DO i2 = 1, m2
+          DO i1 = 1, m1
+            dest(i1, i2, i3, i4) = REAL(src(i1, i2, i3, i4), KIND=sp)
+          END DO
+        END DO
+      END DO
+    END DO
+!$omp end do nowait
+    CALL acc_wait_if_requested(1, opt_acc_async)
+
+  END SUBROUTINE copy_4d_dpsp
+
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_5d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :, :, :, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :, :, :, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+    m3 = SIZE(dest, 3)
+    m4 = SIZE(dest, 4)
+    m5 = SIZE(dest, 5)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2,i3,i4,i5)
+#else
+!$omp do collapse(5)
+#endif
+    DO i5 = 1, m5
+      DO i4 = 1, m4
+        DO i3 = 1, m3
+          DO i2 = 1, m2
+            DO i1 = 1, m1
+              dest(i1, i2, i3, i4, i5) = REAL(src(i1, i2, i3, i4, i5), KIND=sp)
+            END DO
+          END DO
+        END DO
+      END DO
+    END DO
+!$omp end do nowait
+
+    CALL acc_wait_if_requested(1, opt_acc_async)
+  END SUBROUTINE copy_5d_dpsp
+
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_2d_spdp(src, dest, lacc, opt_acc_async)
     REAL(sp), INTENT(IN) :: src(:, :)
-- 
GitLab


From 28087959e86d6e03c4f0c0377445a3080f877aa1 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 4 Mar 2025 16:56:06 +0100
Subject: [PATCH 10/10] Tests for copy 4d/5d

---
 test/fortran/test_fortran_tools.f90 | 32 +++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/test/fortran/test_fortran_tools.f90 b/test/fortran/test_fortran_tools.f90
index a3c9a56..6ff974a 100644
--- a/test/fortran/test_fortran_tools.f90
+++ b/test/fortran/test_fortran_tools.f90
@@ -442,6 +442,34 @@ CONTAINS
     CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.)
   END SUBROUTINE
 
+  SUBROUTINE Test_copy_4d_dpsp
+    REAL(dp) :: src(5, 5, 5, 5) = 1.0
+    REAL(sp) :: dest(5, 5, 5, 5)
+
+    CALL TAG_TEST("Test_copy_4d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_4d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
+  SUBROUTINE Test_copy_5d_dpsp
+    REAL(dp) :: src(5, 5, 5, 5, 5) = 1.0
+    REAL(sp) :: dest(5, 5, 5, 5, 5)
+
+    CALL TAG_TEST("Test_copy_5d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_5d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
   SUBROUTINE Test_copy_2d_spdp
     REAL(sp) :: src(10, 10) = 1.0
     REAL(dp) :: dest(10, 10)
@@ -1909,7 +1937,7 @@ CONTAINS
       DO j = 1, SIZE(array1, 2)
         DO k = 1, SIZE(array1, 3)
           DO l = 1, SIZE(array1, 4)
-            IF (array1(i, j, k, l) /= array2(i, j, k, l)) THEN
+            IF (array1(i, j, k, l) /= REAL(array2(i, j, k, l), KIND=sp)) THEN
               assert_real_spdp_4d_array = .FALSE.
               EXIT
             END IF
@@ -1930,7 +1958,7 @@ CONTAINS
         DO k = 1, SIZE(array1, 3)
           DO l = 1, SIZE(array1, 4)
             DO m = 1, SIZE(array1, 5)
-              IF (array1(i, j, k, l, m) /= array2(i, j, k, l, m)) THEN
+              IF (array1(i, j, k, l, m) /= REAL(array2(i, j, k, l, m), KIND=sp)) THEN
                 assert_real_spdp_5d_array = .FALSE.
                 EXIT
               END IF
-- 
GitLab