From e21ca60f642a1af012049b1c324463f17f636e91 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 4 Mar 2025 16:33:35 +0000
Subject: [PATCH] Variant of copy for 2d dpsp
 (icon-libraries/libfortran-support!113)

## What is the new feature
New copy variants for 2d,4d,5d fields from double-precision to single-precision.

Co-authored-by: Yen-Chen Chen <yen-chen.chen@tum.de>
Approved-by: Yen-Chen Chen <yen-chen.chen@tum.de>
Merged-by: Yen-Chen Chen <yen-chen.chen@tum.de>
Changelog: feature
---
 src/mo_fortran_tools.F90            | 112 +++++++++++++++++++++++++++-
 test/fortran/test_fortran_tools.f90 |  64 +++++++++++++++-
 2 files changed, 170 insertions(+), 6 deletions(-)

diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 204ff1f..3509e70 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -165,7 +165,10 @@ MODULE mo_fortran_tools
     MODULE PROCEDURE copy_3d_sp
     MODULE PROCEDURE copy_4d_sp
     MODULE PROCEDURE copy_5d_sp
+    MODULE PROCEDURE copy_2d_dpsp
     MODULE PROCEDURE copy_3d_dpsp
+    MODULE PROCEDURE copy_4d_dpsp
+    MODULE PROCEDURE copy_5d_dpsp
     MODULE PROCEDURE copy_2d_spdp
     MODULE PROCEDURE copy_3d_spdp
     MODULE PROCEDURE copy_4d_spdp
@@ -563,7 +566,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#ifdef __INTEL_COMPILER
+#if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
@@ -725,7 +728,7 @@ CONTAINS
     m2 = SIZE(dest, 2)
 
     !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
-#ifdef __INTEL_COMPILER
+#if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
 !$omp do collapse(2)
@@ -848,6 +851,36 @@ CONTAINS
     CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_sp
 
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_2d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, m1, m2
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2)
+#else
+!$omp do collapse(2)
+#endif
+    DO i2 = 1, m2
+      DO i1 = 1, m1
+        dest(i1, i2) = REAL(src(i1, i2), KIND=sp)
+      END DO
+    END DO
+!$omp end do nowait
+    CALL acc_wait_if_requested(1, opt_acc_async)
+
+  END SUBROUTINE copy_2d_dpsp
+
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_3d_dpsp(src, dest, lacc, opt_acc_async)
     REAL(dp), INTENT(IN) :: src(:, :, :)
@@ -881,6 +914,81 @@ CONTAINS
 
   END SUBROUTINE copy_3d_dpsp
 
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_4d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :, :, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :, :, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+    m3 = SIZE(dest, 3)
+    m4 = SIZE(dest, 4)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2,i3,i4)
+#else
+!$omp do collapse(4)
+#endif
+    DO i4 = 1, m4
+      DO i3 = 1, m3
+        DO i2 = 1, m2
+          DO i1 = 1, m1
+            dest(i1, i2, i3, i4) = REAL(src(i1, i2, i3, i4), KIND=sp)
+          END DO
+        END DO
+      END DO
+    END DO
+!$omp end do nowait
+    CALL acc_wait_if_requested(1, opt_acc_async)
+
+  END SUBROUTINE copy_4d_dpsp
+
+  !> copy state, omp parallel, does not wait for other threads to complete
+  SUBROUTINE copy_5d_dpsp(src, dest, lacc, opt_acc_async)
+    REAL(dp), INTENT(IN) :: src(:, :, :, :, :)
+    REAL(sp), INTENT(OUT) :: dest(:, :, :, :, :)
+    LOGICAL, INTENT(IN) :: lacc
+    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
+    LOGICAL :: lzacc
+
+    CALL set_acc_host_or_device(lzacc, lacc)
+
+    m1 = SIZE(dest, 1)
+    m2 = SIZE(dest, 2)
+    m3 = SIZE(dest, 3)
+    m4 = SIZE(dest, 4)
+    m5 = SIZE(dest, 5)
+
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+#if (defined(__INTEL_COMPILER))
+!$omp do private(i1,i2,i3,i4,i5)
+#else
+!$omp do collapse(5)
+#endif
+    DO i5 = 1, m5
+      DO i4 = 1, m4
+        DO i3 = 1, m3
+          DO i2 = 1, m2
+            DO i1 = 1, m1
+              dest(i1, i2, i3, i4, i5) = REAL(src(i1, i2, i3, i4, i5), KIND=sp)
+            END DO
+          END DO
+        END DO
+      END DO
+    END DO
+!$omp end do nowait
+
+    CALL acc_wait_if_requested(1, opt_acc_async)
+  END SUBROUTINE copy_5d_dpsp
+
   !> copy state, omp parallel, does not wait for other threads to complete
   SUBROUTINE copy_2d_spdp(src, dest, lacc, opt_acc_async)
     REAL(sp), INTENT(IN) :: src(:, :)
diff --git a/test/fortran/test_fortran_tools.f90 b/test/fortran/test_fortran_tools.f90
index 36012a2..6ff974a 100644
--- a/test/fortran/test_fortran_tools.f90
+++ b/test/fortran/test_fortran_tools.f90
@@ -414,6 +414,62 @@ CONTAINS
     CALL ASSERT_EQUAL(assert_real_sp_5d_array(src, dest), .TRUE.)
   END SUBROUTINE
 
+  SUBROUTINE Test_copy_2d_dpsp
+    REAL(dp) :: src(10, 8) = 1.0
+    REAL(sp) :: dest(10, 8)
+
+    CALL TAG_TEST("Test_copy_2d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_2d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
+  SUBROUTINE Test_copy_3d_dpsp
+    REAL(dp) :: src(10, 8, 6) = 1.0
+    REAL(sp) :: dest(10, 8, 6)
+
+    CALL TAG_TEST("Test_copy_3d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_3d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
+  SUBROUTINE Test_copy_4d_dpsp
+    REAL(dp) :: src(5, 5, 5, 5) = 1.0
+    REAL(sp) :: dest(5, 5, 5, 5)
+
+    CALL TAG_TEST("Test_copy_4d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_4d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
+  SUBROUTINE Test_copy_5d_dpsp
+    REAL(dp) :: src(5, 5, 5, 5, 5) = 1.0
+    REAL(sp) :: dest(5, 5, 5, 5, 5)
+
+    CALL TAG_TEST("Test_copy_5d_dpsp_ones")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.)
+
+    CALL RANDOM_NUMBER(src)
+    CALL TAG_TEST("Test_copy_5d_dpsp_random")
+    CALL copy(src, dest, .FALSE.)
+    CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.)
+  END SUBROUTINE
+
   SUBROUTINE Test_copy_2d_spdp
     REAL(sp) :: src(10, 10) = 1.0
     REAL(dp) :: dest(10, 10)
@@ -1845,7 +1901,7 @@ CONTAINS
     assert_real_spdp_2d_array = .TRUE.
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
-        IF (array1(i, j) /= array2(i, j)) THEN
+        IF (array1(i, j) /= REAL(array2(i, j), KIND=sp)) THEN
           assert_real_spdp_2d_array = .FALSE.
           EXIT
         END IF
@@ -1862,7 +1918,7 @@ CONTAINS
     DO i = 1, SIZE(array1, 1)
       DO j = 1, SIZE(array1, 2)
         DO k = 1, SIZE(array1, 3)
-          IF (array1(i, j, k) /= array2(i, j, k)) THEN
+          IF (array1(i, j, k) /= REAL(array2(i, j, k), KIND=sp)) THEN
             assert_real_spdp_3d_array = .FALSE.
             EXIT
           END IF
@@ -1881,7 +1937,7 @@ CONTAINS
       DO j = 1, SIZE(array1, 2)
         DO k = 1, SIZE(array1, 3)
           DO l = 1, SIZE(array1, 4)
-            IF (array1(i, j, k, l) /= array2(i, j, k, l)) THEN
+            IF (array1(i, j, k, l) /= REAL(array2(i, j, k, l), KIND=sp)) THEN
               assert_real_spdp_4d_array = .FALSE.
               EXIT
             END IF
@@ -1902,7 +1958,7 @@ CONTAINS
         DO k = 1, SIZE(array1, 3)
           DO l = 1, SIZE(array1, 4)
             DO m = 1, SIZE(array1, 5)
-              IF (array1(i, j, k, l, m) /= array2(i, j, k, l, m)) THEN
+              IF (array1(i, j, k, l, m) /= REAL(array2(i, j, k, l, m), KIND=sp)) THEN
                 assert_real_spdp_5d_array = .FALSE.
                 EXIT
               END IF
-- 
GitLab