From 2cbccd735f2b4fa14bf5e29a7982d5e4f7cbc3d1 Mon Sep 17 00:00:00 2001 From: Daniel Reinert <daniel.reinert@dwd.de> Date: Mon, 4 Nov 2024 14:11:16 +0000 Subject: [PATCH] Performance optimization for NEC SX AURORA (icon-libraries/libfortran-support!97) The subroutines `init_zero_4d_[dp,sp,i4]` do not vectorize properly on NEC SX AURORA. Loop collapsing is enforced by a compiler directive for the subroutines `init_zero_4d_[dp,sp,i4]`, in order to ensure proper vectorization on NEC SX AURORA. Approved-by: Yen-Chen Chen <yen-chen.chen@kit.edu> Merged-by: Yen-Chen Chen <yen-chen.chen@kit.edu> Changelog: feature --- src/mo_fortran_tools.F90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90 index 6f9fbe4..74e7ec7 100644 --- a/src/mo_fortran_tools.F90 +++ b/src/mo_fortran_tools.F90 @@ -1201,6 +1201,7 @@ CONTAINS #else !$omp do collapse(4) #endif +!NEC$ forced_collapse DO i4 = 1, m4 DO i3 = 1, m3 DO i2 = 1, m2 @@ -1235,6 +1236,7 @@ CONTAINS #else !$omp do collapse(4) #endif +!NEC$ forced_collapse DO i4 = 1, m4 DO i3 = 1, m3 DO i2 = 1, m2 @@ -1269,6 +1271,7 @@ CONTAINS #else !$omp do collapse(4) #endif +!NEC$ forced_collapse DO i4 = 1, m4 DO i3 = 1, m3 DO i2 = 1, m2 -- GitLab