diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 61ffb6397bbcaac85868d142ee22c480b06b7f21..58005948bb75b0012d47c9bbecb55bd9ee85b38f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -5,6 +5,7 @@ include:
 stages:
   - lint
   - build_and_test
+  - build_and_test_OpenMP
   - build_OpenACC
   - coverage
   - prepare
@@ -94,6 +95,57 @@ nvhpc:
     - levante, hpc, dkrz
   needs: ["Check Typo", "Check OpenACC Style", "Check Style", "Check CMake Style", "Check License"]
 
+OpenMP_gcc11:
+  stage: build_and_test_OpenMP
+  extends: 
+    - .default
+  before_script:
+    - module list
+  script:
+    - module load gcc/11.2.0-gcc-11.2.0
+    - mkdir gcc112
+    - cd gcc112
+    - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DFS_ENABLE_OMP=ON -DCMAKE_C_COMPILER=gcc -DCMAKE_Fortran_COMPILER=gfortran -DCMAKE_CXX_COMPILER=g++
+    - make VERBOSE=1
+    - ctest --output-on-failure
+  tags:
+    - levante, hpc, dkrz
+  needs: ["gcc11"]
+
+OpenMP_intel22:
+  stage: build_and_test_OpenMP
+  extends: 
+    - .default
+  before_script:
+    - module list
+  script:
+    - module load gcc/11.2.0-gcc-11.2.0 intel-oneapi-compilers/2022.0.1-gcc-11.2.0
+    - mkdir intel22
+    - cd intel22
+    - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DFS_ENABLE_OMP=ON -DCMAKE_C_COMPILER=icc -DCMAKE_Fortran_COMPILER=ifort -DCMAKE_CXX_COMPILER=icpc -DCMAKE_BUILD_RPATH=/sw/spack-levante/gcc-11.2.0-bcn7mb/lib64
+    - make VERBOSE=1
+    - ctest --output-on-failure
+  tags:
+    - levante, hpc, dkrz
+  needs: ["intel22"]
+
+OpenMP_nvhpc:
+  stage: build_and_test_OpenMP
+  extends: 
+    - .default
+  before_script:
+    - module list
+  script:
+    - module load gcc/11.2.0-gcc-11.2.0 nvhpc/22.5-gcc-11.2.0
+    - mkdir nvhpc
+    - cd nvhpc
+    - /sw/spack-levante/cmake-3.23.1-q5kzz6/bin/cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DFS_ENABLE_OMP=ON -DCMAKE_C_COMPILER=nvc -DCMAKE_Fortran_COMPILER=nvfortran -DCMAKE_CXX_COMPILER=nvc++
+    - make VERBOSE=1
+    - ctest --output-on-failure
+  tags:
+    - levante, hpc, dkrz
+  needs: ["nvhpc"]
+
 OpenACC:
   stage: build_OpenACC
   extends: 
@@ -251,7 +303,7 @@ Prepare Changelog:
   artifacts:
     paths:
     - release_notes.md
-  needs: ["nag", "gcc11", "intel22", "nvhpc", "OpenACC"]
+  needs: ["nag", "OpenMP_gcc11", "OpenMP_intel22", "OpenMP_nvhpc", "OpenACC"]
 
 Release:
   stage: release
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index aecab81681f1b6c9af752b891ce0d32a570661c9..d907ce79a3b2389d69d9cdf0ca622b886a507ec1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,11 +21,6 @@ test_big_endian(HAVE_BIG_ENDIAN)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
                ${CMAKE_CURRENT_BINARY_DIR}/config.h)
 
-if(FS_ENABLE_OMP)
-  find_package(OpenMP REQUIRED)
-  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OpenMP_Fortran_FLAGS}")
-endif()
-
 add_library(
   fortran-support
   ${CMAKE_CURRENT_BINARY_DIR}/config.h
@@ -90,6 +85,11 @@ if(FS_ENABLE_SINGLE_PRECISION)
   target_compile_definitions(fortran-support PRIVATE __SINGLE_PRECISION)
 endif()
 
+if(FS_ENABLE_OMP)
+  find_package(OpenMP COMPONENTS Fortran REQUIRED)
+  target_link_libraries(fortran-support PUBLIC OpenMP::OpenMP_Fortran)
+endif()
+
 include("${PROJECT_SOURCE_DIR}/cmake/check_macro.cmake")
 check_macro_defined(HAS_OPENACC_MACRO _OPENACC QUIET)
 if(FS_ENABLE_OPENACC)
diff --git a/src/mo_fortran_tools.F90 b/src/mo_fortran_tools.F90
index 4adddc68ed48ab81501e4947129fa022ebfdde95..08c51387cfb18440dbca52741adf126587f4aa46 100644
--- a/src/mo_fortran_tools.F90
+++ b/src/mo_fortran_tools.F90
@@ -2224,26 +2224,12 @@ CONTAINS
         CALL util_stride_1d(in_stride(2), elem_byte_size, &
                             C_LOC(ptr_in(1, 1)), C_LOC(ptr_in(1, 2)))
         base_shape(1) = in_stride(2)
+      ELSE
+        base_shape(1) = in_shape(1)
       END IF
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_dp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2299,26 +2285,12 @@ CONTAINS
         CALL util_stride_1d(in_stride(2), elem_byte_size, &
                             C_LOC(ptr_in(1, 1)), C_LOC(ptr_in(1, 2)))
         base_shape(1) = in_stride(2)
+      ELSE
+        base_shape(1) = in_shape(1)
       END IF
       base_shape(2) = in_shape(2)
       CALL insert_dimension_r_sp_3_2_s(ptr_out, ptr_in(1, 1), &
                                        base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2374,26 +2346,12 @@ CONTAINS
         CALL util_stride_1d(in_stride(2), elem_byte_size, &
                             C_LOC(ptr_in(1, 1)), C_LOC(ptr_in(1, 2)))
         base_shape(1) = in_stride(2)
+      ELSE
+        base_shape(1) = in_shape(1)
       END IF
       base_shape(2) = in_shape(2)
       CALL insert_dimension_i4_3_2_s(ptr_out, ptr_in(1, 1), &
                                      base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1
@@ -2447,26 +2405,12 @@ CONTAINS
         CALL util_stride_1d(in_stride(2), elem_byte_size, &
                             C_LOC(ptr_in(1, 1)), C_LOC(ptr_in(1, 2)))
         base_shape(1) = in_stride(2)
+      ELSE
+        base_shape(1) = in_shape(1)
       END IF
       base_shape(2) = in_shape(2)
       CALL insert_dimension_l_3_2_s(ptr_out, ptr_in(1, 1), &
                                     base_shape, new_dim_rank)
-      IF (in_stride(1) > 1 .OR. in_stride(2) > in_shape(1) &
-          .OR. base_shape(1) /= in_shape(1)) THEN
-        out_stride(1) = in_stride(1)
-        out_stride(2) = 1
-        out_shape(1:out_rank - 1) = in_shape
-        DO i = out_rank, new_dim_rank + 1, -1
-          out_shape(i) = out_shape(i - 1)
-          out_stride(i) = out_stride(i - 1)
-        END DO
-        out_stride(new_dim_rank) = 1
-        out_shape(new_dim_rank) = 1
-        out_shape = (out_shape - 1)*out_stride + 1
-        ptr_out => ptr_out(:out_shape(1):out_stride(1), &
-             &             :out_shape(2):out_stride(2), &
-             &             :out_shape(3):out_stride(3))
-      END IF
     ELSE
       out_shape(1:out_rank - 1) = SHAPE(ptr_in)
       DO i = out_rank, new_dim_rank + 1, -1