diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 708296f6be9f2258417571dd56489527e9b162bf..8079c1834e3e888297f84a28c432c980eaf5deb1 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -54,6 +54,7 @@ MODULE mo_fortran_tools
   PUBLIC :: assert_acc_host_only
   PUBLIC :: assert_acc_device_only
   PUBLIC :: set_acc_host_or_device
+  PUBLIC :: set_acc_async_queue
 
   PRIVATE
 
@@ -511,19 +512,21 @@ CONTAINS
   END SUBROUTINE resize_arr_c1d
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_1d_dp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_1d_dp(src, dest, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(IN) :: src(:)
     REAL(dp), INTENT(OUT) :: dest(:)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, m1
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) IF(lzacc)
     !$omp do private(i1)
     DO i1 = 1, m1
       dest(i1) = src(i1)
@@ -531,24 +534,25 @@ CONTAINS
     !$omp end do nowait
     !$ACC END PARALLEL LOOP
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_1d_dp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_2d_dp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_2d_dp(src, dest, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(IN) :: src(:, :)
     REAL(dp), INTENT(OUT) :: dest(:, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #ifdef __INTEL_COMPILER
 !$omp do private(i1,i2)
 #else
@@ -560,26 +564,27 @@ CONTAINS
       END DO
     END DO
 !$omp end do nowait
-    CALL acc_wait_if_requested(1, opt_acc_async)
 
   END SUBROUTINE copy_2d_dp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_3d_dp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_3d_dp(src, dest, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(IN) :: src(:, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
     m3 = SIZE(dest, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(_CRAYFTN) || defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -594,26 +599,27 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_3d_dp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_4d_dp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_4d_dp(src, dest, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(IN) :: src(:, :, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
     m3 = SIZE(dest, 3)
     m4 = SIZE(dest, 4)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(4) IF(lzacc)
 #if (defined(_CRAYFTN) || defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4)
 #else
@@ -630,19 +636,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_4d_dp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_5d_dp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_5d_dp(src, dest, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(IN) :: src(:, :, :, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
@@ -650,7 +657,7 @@ CONTAINS
     m4 = SIZE(dest, 4)
     m5 = SIZE(dest, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -669,19 +676,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_dp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_5d_sp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_5d_sp(src, dest, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(IN) :: src(:, :, :, :, :)
     REAL(sp), INTENT(OUT) :: dest(:, :, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
@@ -689,7 +697,7 @@ CONTAINS
     m4 = SIZE(dest, 4)
     m5 = SIZE(dest, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -708,24 +716,25 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_sp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_2d_spdp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_2d_spdp(src, dest, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(IN) :: src(:, :)
     REAL(dp), INTENT(OUT) :: dest(:, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
@@ -737,26 +746,27 @@ CONTAINS
       END DO
     END DO
 !$omp end do nowait
-    CALL acc_wait_if_requested(1, opt_acc_async)
 
   END SUBROUTINE copy_2d_spdp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_3d_spdp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_3d_spdp(src, dest, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(IN) :: src(:, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
     m3 = SIZE(dest, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -770,27 +780,28 @@ CONTAINS
       END DO
     END DO
 !$omp end do nowait
-    CALL acc_wait_if_requested(1, opt_acc_async)
 
   END SUBROUTINE copy_3d_spdp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_4d_spdp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_4d_spdp(src, dest, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(IN) :: src(:, :, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
     m3 = SIZE(dest, 3)
     m4 = SIZE(dest, 4)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(4) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4)
 #else
@@ -806,20 +817,21 @@ CONTAINS
       END DO
     END DO
 !$omp end do nowait
-    CALL acc_wait_if_requested(1, opt_acc_async)
 
   END SUBROUTINE copy_4d_spdp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_5d_spdp(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_5d_spdp(src, dest, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(IN) :: src(:, :, :, :, :)
     REAL(dp), INTENT(OUT) :: dest(:, :, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
@@ -827,7 +839,7 @@ CONTAINS
     m4 = SIZE(dest, 4)
     m5 = SIZE(dest, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -846,24 +858,25 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_spdp
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_2d_i4(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_2d_i4(src, dest, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(IN) :: src(:, :)
     INTEGER(ik4), INTENT(OUT) :: dest(:, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
@@ -876,25 +889,26 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_2d_i4
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_3d_i4(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_3d_i4(src, dest, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(IN) :: src(:, :, :)
     INTEGER(ik4), INTENT(OUT) :: dest(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
     m3 = SIZE(dest, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -909,19 +923,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_3d_i4
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_5d_i4(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_5d_i4(src, dest, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(IN) :: src(:, :, :, :, :)
     INTEGER(ik4), INTENT(OUT) :: dest(:, :, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
@@ -929,7 +944,7 @@ CONTAINS
     m4 = SIZE(dest, 4)
     m5 = SIZE(dest, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -948,19 +963,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_i4
 
   !> copy state, omp parallel, does not wait for other threads to complete
-  SUBROUTINE copy_5d_l(src, dest, lacc, opt_acc_async)
+  SUBROUTINE copy_5d_l(src, dest, lacc, opt_acc_async_queue)
     LOGICAL, INTENT(IN) :: src(:, :, :, :, :)
     LOGICAL, INTENT(OUT) :: dest(:, :, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(dest, 1)
     m2 = SIZE(dest, 2)
@@ -968,7 +984,7 @@ CONTAINS
     m4 = SIZE(dest, 4)
     m5 = SIZE(dest, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
     !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -987,62 +1003,65 @@ CONTAINS
     END DO
     !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE copy_5d_l
 
-  SUBROUTINE init_zero_1d_dp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_1d_dp(init_var, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, m1
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) IF(lzacc)
 !$omp do
     DO i1 = 1, m1
       init_var(i1) = 0.0_dp
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_1d_dp
 
-  SUBROUTINE init_zero_1d_sp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_1d_sp(init_var, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(OUT) :: init_var(:)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, m1
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) IF(lzacc)
     !$omp do
     DO i1 = 1, m1
       init_var(i1) = 0.0_dp
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_1d_sp
 
-  SUBROUTINE init_zero_2d_dp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_2d_dp(init_var, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
@@ -1055,22 +1074,23 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_2d_dp
 
-  SUBROUTINE init_zero_2d_i4(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_2d_i4(init_var, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(OUT) :: init_var(:, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
@@ -1083,23 +1103,24 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_2d_i4
 
-  SUBROUTINE init_zero_3d_dp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_3d_dp(init_var, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER) || defined(_CRAYFTN))
 !$omp do private(i1,i2,i3)
 #else
@@ -1114,23 +1135,24 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_3d_dp
 
-  SUBROUTINE init_zero_3d_sp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_3d_sp(init_var, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(OUT) :: init_var(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -1145,24 +1167,24 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
-
   END SUBROUTINE init_zero_3d_sp
 
-  SUBROUTINE init_zero_3d_i4(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_3d_i4(init_var, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(OUT) :: init_var(:, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -1177,24 +1199,25 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_3d_i4
 
-  SUBROUTINE init_zero_4d_dp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_4d_dp(init_var, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
     m4 = SIZE(init_var, 4)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(4) IF(lzacc)
 #if (defined(__INTEL_COMPILER) || defined(_CRAYFTN))
 !$omp do private(i1,i2,i3,i4)
 #else
@@ -1212,24 +1235,25 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_4d_dp
 
-  SUBROUTINE init_zero_4d_sp(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_4d_sp(init_var, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(OUT) :: init_var(:, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
     m4 = SIZE(init_var, 4)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(4) IF(lzacc)
 #if (defined(__INTEL_COMPILER) || defined(_CRAYFTN))
 !$omp do private(i1,i2,i3,i4)
 #else
@@ -1247,24 +1271,25 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_4d_sp
 
-  SUBROUTINE init_zero_4d_i4(init_var, lacc, opt_acc_async)
+  SUBROUTINE init_zero_4d_i4(init_var, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(OUT) :: init_var(:, :, :, :)
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, i2, i3, i4, m1, m2, m3, m4
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
     m4 = SIZE(init_var, 4)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(4) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(4) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4)
 #else
@@ -1282,21 +1307,22 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_zero_4d_i4
 
-  SUBROUTINE init_1d_dp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_1d_dp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:)
     REAL(dp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
     INTEGER :: i1, m1
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) IF(lzacc)
     !$omp do private(i1)
     DO i1 = 1, m1
       init_var(i1) = init_val
@@ -1304,24 +1330,25 @@ CONTAINS
     !$omp end do nowait
     !$ACC END PARALLEL LOOP
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_1d_dp
 
-  SUBROUTINE init_2d_dp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_2d_dp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :)
     REAL(dp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, m1, m2
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(2) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(2) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2)
 #else
@@ -1334,25 +1361,26 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_2d_dp
 
-  SUBROUTINE init_3d_dp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_3d_dp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :, :)
     REAL(dp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -1367,25 +1395,26 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_3d_dp
 
-  SUBROUTINE init_3d_spdp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_3d_spdp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(OUT) :: init_var(:, :, :)
     REAL(dp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, m1, m2, m3
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
     m3 = SIZE(init_var, 3)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(3) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(3) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3)
 #else
@@ -1400,19 +1429,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_3d_spdp
 
-  SUBROUTINE init_5d_dp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_5d_dp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(dp), INTENT(OUT) :: init_var(:, :, :, :, :)
     REAL(dp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
@@ -1420,7 +1450,7 @@ CONTAINS
     m4 = SIZE(init_var, 4)
     m5 = SIZE(init_var, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -1439,19 +1469,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_5d_dp
 
-  SUBROUTINE init_5d_sp(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_5d_sp(init_var, init_val, lacc, opt_acc_async_queue)
     REAL(sp), INTENT(OUT) :: init_var(:, :, :, :, :)
     REAL(sp), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
@@ -1459,7 +1490,7 @@ CONTAINS
     m4 = SIZE(init_var, 4)
     m5 = SIZE(init_var, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -1478,19 +1509,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_5d_sp
 
-  SUBROUTINE init_5d_i4(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_5d_i4(init_var, init_val, lacc, opt_acc_async_queue)
     INTEGER(ik4), INTENT(OUT) :: init_var(:, :, :, :, :)
     INTEGER(ik4), INTENT(IN) :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
@@ -1498,7 +1530,7 @@ CONTAINS
     m4 = SIZE(init_var, 4)
     m5 = SIZE(init_var, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -1517,19 +1549,20 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_5d_i4
 
-  SUBROUTINE init_5d_l(init_var, init_val, lacc, opt_acc_async)
+  SUBROUTINE init_5d_l(init_var, init_val, lacc, opt_acc_async_queue)
     LOGICAL, INTENT(OUT) :: init_var(:, :, :, :, :)
     LOGICAL, INTENT(IN)  :: init_val
     LOGICAL, INTENT(IN) :: lacc
-    LOGICAL, INTENT(IN), OPTIONAL :: opt_acc_async
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
 
     INTEGER :: i1, i2, i3, i4, i5, m1, m2, m3, m4, m5
     LOGICAL :: lzacc
+    INTEGER :: acc_async_queue
 
     CALL set_acc_host_or_device(lzacc, lacc)
+    CALL set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
 
     m1 = SIZE(init_var, 1)
     m2 = SIZE(init_var, 2)
@@ -1537,7 +1570,7 @@ CONTAINS
     m4 = SIZE(init_var, 4)
     m5 = SIZE(init_var, 5)
 
-    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(1) COLLAPSE(5) IF(lzacc)
+    !$ACC PARALLEL LOOP DEFAULT(PRESENT) ASYNC(acc_async_queue) COLLAPSE(5) IF(lzacc)
 #if (defined(__INTEL_COMPILER))
 !$omp do private(i1,i2,i3,i4,i5)
 #else
@@ -1556,7 +1589,6 @@ CONTAINS
     END DO
 !$omp end do nowait
 
-    CALL acc_wait_if_requested(1, opt_acc_async)
   END SUBROUTINE init_5d_l
 
   SUBROUTINE var_scale_3d_dp(var, scale_val, lacc, opt_acc_async)
@@ -2363,4 +2395,14 @@ CONTAINS
 #endif
   END SUBROUTINE set_acc_host_or_device
 
+  SUBROUTINE set_acc_async_queue(acc_async_queue, opt_acc_async_queue)
+    INTEGER, INTENT(OUT) :: acc_async_queue
+    INTEGER, INTENT(IN), OPTIONAL :: opt_acc_async_queue
+
+    acc_async_queue = 1
+    IF (PRESENT(opt_acc_async_queue)) THEN
+      acc_async_queue = opt_acc_async_queue
+    END IF
+  END SUBROUTINE set_acc_async_queue
+
 END MODULE mo_fortran_tools