From e21ca60f642a1af012049b1c324463f17f636e91 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 4 Mar 2025 16:33:35 +0000 Subject: [PATCH] Variant of copy for 2d dpsp (icon-libraries/libfortran-support!113) ## What is the new feature New copy variants for 2d,4d,5d fields from double-precision to single-precision. Co-authored-by: Yen-Chen Chen <yen-chen.chen@tum.de> Approved-by: Yen-Chen Chen <yen-chen.chen@tum.de> Merged-by: Yen-Chen Chen <yen-chen.chen@tum.de> Changelog: feature --- src/mo_fortran_tools.F90 | 112 +++++++++++++++++++++++++++- test/fortran/test_fortran_tools.f90 | 64 +++++++++++++++- 2 files changed, 170 insertions(+), 6 deletions(-) diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90 index 204ff1f..3509e70 100644 --- a/src/mo_fortran_tools.F90 +++ b/src/mo_fortran_tools.F90 @@ -165,7 +165,10 @@ MODULE mo_fortran_tools MODULE PROCEDURE copy_3d_sp MODULE PROCEDURE copy_4d_sp MODULE PROCEDURE copy_5d_sp + MODULE PROCEDURE copy_2d_dpsp MODULE PROCEDURE copy_3d_dpsp + MODULE PROCEDURE copy_4d_dpsp + MODULE PROCEDURE copy_5d_dpsp MODULE PROCEDURE copy_2d_spdp MODULE PROCEDURE copy_3d_spdp MODULE PROCEDURE copy_4d_spdp @@ -563,7 +566,7 @@ CONTAINS m2 = SIZE(dest, 2) !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc) -#ifdef __INTEL_COMPILER +#if (defined(__INTEL_COMPILER)) !$omp do private(i1,i2) #else !$omp do collapse(2) @@ -725,7 +728,7 @@ CONTAINS m2 = SIZE(dest, 2) !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc) -#ifdef __INTEL_COMPILER +#if (defined(__INTEL_COMPILER)) !$omp do private(i1,i2) #else !$omp do collapse(2) @@ -848,6 +851,36 @@ CONTAINS CALL acc_wait_if_requested(1, opt_acc_async) END SUBROUTINE copy_5d_sp + !> copy state, omp parallel, does not wait for other threads to complete + SUBROUTINE copy_2d_dpsp(src, dest, lacc, opt_acc_async) + REAL(dp), INTENT(IN) :: src(:, :) + REAL(sp), INTENT(OUT) :: dest(:, :) + LOGICAL, INTENT(IN) :: lacc + LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async + INTEGER :: i1, i2, m1, m2 + LOGICAL :: lzacc + + CALL set_acc_host_or_device(lzacc, lacc) + + m1 = SIZE(dest, 1) + m2 = SIZE(dest, 2) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc) +#if (defined(__INTEL_COMPILER)) +!$omp do private(i1,i2) +#else +!$omp do collapse(2) +#endif + DO i2 = 1, m2 + DO i1 = 1, m1 + dest(i1, i2) = REAL(src(i1, i2), KIND=sp) + END DO + END DO +!$omp end do nowait + CALL acc_wait_if_requested(1, opt_acc_async) + + END SUBROUTINE copy_2d_dpsp + !> copy state, omp parallel, does not wait for other threads to complete SUBROUTINE copy_3d_dpsp(src, dest, lacc, opt_acc_async) REAL(dp), INTENT(IN) :: src(:, :, :) @@ -881,6 +914,81 @@ CONTAINS END SUBROUTINE copy_3d_dpsp + !> copy state, omp parallel, does not wait for other threads to complete + SUBROUTINE copy_4d_dpsp(src, dest, lacc, opt_acc_async) + REAL(dp), INTENT(IN) :: src(:, :, :, :) + REAL(sp), INTENT(OUT) :: dest(:, :, :, :) + LOGICAL, INTENT(IN) :: lacc + LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async + INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4 + LOGICAL :: lzacc + + CALL set_acc_host_or_device(lzacc, lacc) + + m1 = SIZE(dest, 1) + m2 = SIZE(dest, 2) + m3 = SIZE(dest, 3) + m4 = SIZE(dest, 4) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc) +#if (defined(__INTEL_COMPILER)) +!$omp do private(i1,i2,i3,i4) +#else +!$omp do collapse(4) +#endif + DO i4 = 1, m4 + DO i3 = 1, m3 + DO i2 = 1, m2 + DO i1 = 1, m1 + dest(i1, i2, i3, i4) = REAL(src(i1, i2, i3, i4), KIND=sp) + END DO + END DO + END DO + END DO +!$omp end do nowait + CALL acc_wait_if_requested(1, opt_acc_async) + + END SUBROUTINE copy_4d_dpsp + + !> copy state, omp parallel, does not wait for other threads to complete + SUBROUTINE copy_5d_dpsp(src, dest, lacc, opt_acc_async) + REAL(dp), INTENT(IN) :: src(:, :, :, :, :) + REAL(sp), INTENT(OUT) :: dest(:, :, :, :, :) + LOGICAL, INTENT(IN) :: lacc + LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async + INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5 + LOGICAL :: lzacc + + CALL set_acc_host_or_device(lzacc, lacc) + + m1 = SIZE(dest, 1) + m2 = SIZE(dest, 2) + m3 = SIZE(dest, 3) + m4 = SIZE(dest, 4) + m5 = SIZE(dest, 5) + + !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc) +#if (defined(__INTEL_COMPILER)) +!$omp do private(i1,i2,i3,i4,i5) +#else +!$omp do collapse(5) +#endif + DO i5 = 1, m5 + DO i4 = 1, m4 + DO i3 = 1, m3 + DO i2 = 1, m2 + DO i1 = 1, m1 + dest(i1, i2, i3, i4, i5) = REAL(src(i1, i2, i3, i4, i5), KIND=sp) + END DO + END DO + END DO + END DO + END DO +!$omp end do nowait + + CALL acc_wait_if_requested(1, opt_acc_async) + END SUBROUTINE copy_5d_dpsp + !> copy state, omp parallel, does not wait for other threads to complete SUBROUTINE copy_2d_spdp(src, dest, lacc, opt_acc_async) REAL(sp), INTENT(IN) :: src(:, :) diff --git a/test/fortran/test_fortran_tools.f90 b/test/fortran/test_fortran_tools.f90 index 36012a2..6ff974a 100644 --- a/test/fortran/test_fortran_tools.f90 +++ b/test/fortran/test_fortran_tools.f90 @@ -414,6 +414,62 @@ CONTAINS CALL ASSERT_EQUAL(assert_real_sp_5d_array(src, dest), .TRUE.) END SUBROUTINE + SUBROUTINE Test_copy_2d_dpsp + REAL(dp) :: src(10, 8) = 1.0 + REAL(sp) :: dest(10, 8) + + CALL TAG_TEST("Test_copy_2d_dpsp_ones") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.) + + CALL RANDOM_NUMBER(src) + CALL TAG_TEST("Test_copy_2d_dpsp_random") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_2d_array(dest, src), .TRUE.) + END SUBROUTINE + + SUBROUTINE Test_copy_3d_dpsp + REAL(dp) :: src(10, 8, 6) = 1.0 + REAL(sp) :: dest(10, 8, 6) + + CALL TAG_TEST("Test_copy_3d_dpsp_ones") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.) + + CALL RANDOM_NUMBER(src) + CALL TAG_TEST("Test_copy_3d_dpsp_random") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_3d_array(dest, src), .TRUE.) + END SUBROUTINE + + SUBROUTINE Test_copy_4d_dpsp + REAL(dp) :: src(5, 5, 5, 5) = 1.0 + REAL(sp) :: dest(5, 5, 5, 5) + + CALL TAG_TEST("Test_copy_4d_dpsp_ones") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.) + + CALL RANDOM_NUMBER(src) + CALL TAG_TEST("Test_copy_4d_dpsp_random") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_4d_array(dest, src), .TRUE.) + END SUBROUTINE + + SUBROUTINE Test_copy_5d_dpsp + REAL(dp) :: src(5, 5, 5, 5, 5) = 1.0 + REAL(sp) :: dest(5, 5, 5, 5, 5) + + CALL TAG_TEST("Test_copy_5d_dpsp_ones") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.) + + CALL RANDOM_NUMBER(src) + CALL TAG_TEST("Test_copy_5d_dpsp_random") + CALL copy(src, dest, .FALSE.) + CALL ASSERT_EQUAL(assert_real_spdp_5d_array(dest, src), .TRUE.) + END SUBROUTINE + SUBROUTINE Test_copy_2d_spdp REAL(sp) :: src(10, 10) = 1.0 REAL(dp) :: dest(10, 10) @@ -1845,7 +1901,7 @@ CONTAINS assert_real_spdp_2d_array = .TRUE. DO i = 1, SIZE(array1, 1) DO j = 1, SIZE(array1, 2) - IF (array1(i, j) /= array2(i, j)) THEN + IF (array1(i, j) /= REAL(array2(i, j), KIND=sp)) THEN assert_real_spdp_2d_array = .FALSE. EXIT END IF @@ -1862,7 +1918,7 @@ CONTAINS DO i = 1, SIZE(array1, 1) DO j = 1, SIZE(array1, 2) DO k = 1, SIZE(array1, 3) - IF (array1(i, j, k) /= array2(i, j, k)) THEN + IF (array1(i, j, k) /= REAL(array2(i, j, k), KIND=sp)) THEN assert_real_spdp_3d_array = .FALSE. EXIT END IF @@ -1881,7 +1937,7 @@ CONTAINS DO j = 1, SIZE(array1, 2) DO k = 1, SIZE(array1, 3) DO l = 1, SIZE(array1, 4) - IF (array1(i, j, k, l) /= array2(i, j, k, l)) THEN + IF (array1(i, j, k, l) /= REAL(array2(i, j, k, l), KIND=sp)) THEN assert_real_spdp_4d_array = .FALSE. EXIT END IF @@ -1902,7 +1958,7 @@ CONTAINS DO k = 1, SIZE(array1, 3) DO l = 1, SIZE(array1, 4) DO m = 1, SIZE(array1, 5) - IF (array1(i, j, k, l, m) /= array2(i, j, k, l, m)) THEN + IF (array1(i, j, k, l, m) /= REAL(array2(i, j, k, l, m), KIND=sp)) THEN assert_real_spdp_5d_array = .FALSE. EXIT END IF -- GitLab