Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • icon-libraries/libiconmath
1 result
Show changes
Commits on Source (24)
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#include <iostream>
#include <vector>
#include <horizontal/lib_divrot.hpp>
#include <support/mo_lib_loopindices.hpp>
template <typename T>
void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const T *lsq_qtmat_c,
const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c,
const T *lsq_moments, T *p_coeff, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in,
int slev, int elev, int nproma, bool l_consv, bool lacc,
bool acc_async, int nblks_c, int nlev, int lsq_dim_unk,
int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T *> z_d("z_d", lsq_dim_c);
Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk);
UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk,
nblks_c);
UnmanagedConstT3D lsq_rmat_utri_c_view(
lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_l_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
// matrix multiplication Q^T d (partitioned into 2 dot products)
z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) +
lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) +
lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2);
z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) +
lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) +
lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2);
p_coeff_view(2, jc, jk, jb) =
lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1);
p_coeff_view(1, jc, jk, jb) =
lsq_rmat_rdiag_c_view(jc, 0, jb) *
(z_qt_times_d(0) -
lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb));
p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb);
});
if (l_consv) {
Kokkos::parallel_for(
"recon_lsq_cell_l_consv", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
p_coeff_view(0, jc, jk, jb) =
p_coeff_view(0, jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1);
});
}
}
if (!acc_async)
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L);
template <typename T>
void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const T *lsq_pseudoinv,
const T *lsq_moments, T *p_coeff, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in,
int slev, int elev, int nproma, bool l_consv,
bool lacc, bool acc_async, int nblks_c, int nlev,
int lsq_dim_unk, int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T *> z_b("z_b", lsq_dim_c);
UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_l_svd_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
p_coeff_view(2, jc, jk, jb) =
lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2);
p_coeff_view(1, jc, jk, jb) =
lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2);
p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb);
});
if (l_consv) {
Kokkos::parallel_for(
"recon_lsq_cell_l_svd_consv", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
p_coeff_view(0, jc, jk, jb) =
p_coeff_view(0, jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1);
});
}
}
if (!acc_async)
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD);
template <typename T>
void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c,
const T *lsq_rmat_utri_c, const T *lsq_moments,
T *p_coeff, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev, int elev,
int nproma, int patch_id, bool l_limited_area, bool lacc,
int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev);
Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk);
UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c);
UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma,
(lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
if (patch_id > 0 || l_limited_area) {
Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
{0, i_startidx_in, slev, i_startblk},
{lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
Kokkos::parallel_for(
"recon_lsq_cell_q_init", initPolicy,
KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
p_coeff_view(ji, jc, jk, jb) = 0;
});
}
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_q_step1", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
p_cc_view(jc, jk, jb);
z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
p_cc_view(jc, jk, jb);
z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
p_cc_view(jc, jk, jb);
z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
p_cc_view(jc, jk, jb);
z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
p_cc_view(jc, jk, jb);
z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
p_cc_view(jc, jk, jb);
});
Kokkos::parallel_for(
"recon_lsq_cell_q_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4);
p_coeff_view(4, jc, jk, jb) =
ptr_rrdiag(jc, 3, jb) *
(z_qt_times_d(3) -
ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb));
p_coeff_view(3, jc, jk, jb) =
ptr_rrdiag(jc, 2, jb) *
(z_qt_times_d(2) -
ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) -
ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb));
p_coeff_view(2, jc, jk, jb) =
ptr_rrdiag(jc, 1, jb) *
(z_qt_times_d(1) -
ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) -
ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) -
ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb));
p_coeff_view(1, jc, jk, jb) =
ptr_rrdiag(jc, 0, jb) *
(z_qt_times_d(0) -
ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) -
ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) -
ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) -
ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb));
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4);
});
}
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q);
template <typename T>
void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c,
const int *lsq_blk_c, const T *lsq_pseudoinv,
const T *lsq_moments, T *p_coeff, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in,
int slev, int elev, int nproma, int patch_id,
bool l_limited_area, bool lacc, int nblks_c, int nlev,
int lsq_dim_unk, int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev);
UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
if (patch_id > 0 || l_limited_area) {
Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
{0, i_startidx_in, slev, i_startblk},
{lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
Kokkos::parallel_for(
"recon_lsq_cell_q_svd_init", initPolicy,
KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
p_coeff_view(ji, jc, jk, jb) = 0;
});
}
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_q_svd_step1", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
p_cc_view(jc, jk, jb);
z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
p_cc_view(jc, jk, jb);
z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
p_cc_view(jc, jk, jb);
z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
p_cc_view(jc, jk, jb);
z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
p_cc_view(jc, jk, jb);
z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
p_cc_view(jc, jk, jb);
});
Kokkos::parallel_for(
"recon_lsq_cell_q_svd_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
p_coeff_view(5, jc, jk, jb) =
lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk);
p_coeff_view(4, jc, jk, jb) =
lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk);
p_coeff_view(3, jc, jk, jb) =
lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk);
p_coeff_view(2, jc, jk, jb) =
lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk);
p_coeff_view(1, jc, jk, jb) =
lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk);
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4);
});
}
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD);
template <typename T>
void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c,
const T *lsq_rmat_utri_c, const T *lsq_moments,
T *p_coeff, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev, int elev,
int nproma, int patch_id, bool l_limited_area, bool lacc,
int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev);
Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9);
UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c);
UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma,
(lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
if (patch_id > 0 || l_limited_area) {
Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
{0, i_startidx_in, slev, i_startblk},
{lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
Kokkos::parallel_for(
"recon_lsq_cell_c_init", initPolicy,
KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
p_coeff_view(ji, jc, jk, jb) = 0;
});
}
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_c_step1", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
p_cc_view(jc, jk, jb);
z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
p_cc_view(jc, jk, jb);
z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
p_cc_view(jc, jk, jb);
z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
p_cc_view(jc, jk, jb);
z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
p_cc_view(jc, jk, jb);
z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
p_cc_view(jc, jk, jb);
});
Kokkos::parallel_for(
"recon_lsq_cell_c_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk);
p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8);
p_coeff_view(8, jc, jk, jb) =
ptr_rrdiag(jc, 7, jb) *
(z_qt_times_d(7) -
ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb));
p_coeff_view(7, jc, jk, jb) =
ptr_rrdiag(jc, 6, jb) *
(z_qt_times_d(6) -
(ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(6, jc, jk, jb) =
ptr_rrdiag(jc, 5, jb) *
(z_qt_times_d(5) -
(ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(5, jc, jk, jb) =
ptr_rrdiag(jc, 4, jb) *
(z_qt_times_d(4) -
(ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) +
ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(4, jc, jk, jb) =
ptr_rrdiag(jc, 3, jb) *
(z_qt_times_d(3) -
(ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) +
ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) +
ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(3, jc, jk, jb) =
ptr_rrdiag(jc, 2, jb) *
(z_qt_times_d(2) -
(ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) +
ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) +
ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) +
ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(2, jc, jk, jb) =
ptr_rrdiag(jc, 1, jb) *
(z_qt_times_d(1) -
(ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) +
ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) +
ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) +
ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) +
ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(1, jc, jk, jb) =
ptr_rrdiag(jc, 0, jb) *
(z_qt_times_d(0) -
(ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) +
ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) +
ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) +
ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) +
ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) +
ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) +
ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) +
ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb)));
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
(p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) +
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) +
p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) +
p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) +
p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) +
p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) +
p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) +
p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) +
p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8));
});
}
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C);
template <typename T>
void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c,
const int *lsq_blk_c, const T *lsq_pseudoinv,
const T *lsq_moments, T *p_coeff, int i_startblk,
int i_endblk, int i_startidx_in, int i_endidx_in,
int slev, int elev, int nproma, int patch_id,
bool l_limited_area,
bool lacc, int nblks_c, int nlev, int lsq_dim_unk,
int lsq_dim_c) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
Kokkos::View<T *> z_b("z_b", 9);
UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
lsq_dim_c, nblks_c);
UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
if (patch_id > 0 || l_limited_area) {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy(
{slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1});
Kokkos::parallel_for(
"recon_lsq_cell_c_svd_init", initPolicy,
KOKKOS_LAMBDA(const int jk, const int jc, const int ji) {
p_coeff_view(ji, jc, jk, jb) = 0;
});
}
}
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"recon_lsq_cell_c_svd_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
p_cc_view(jc, jk, jb);
z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
p_cc_view(jc, jk, jb);
z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
p_cc_view(jc, jk, jb);
z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
p_cc_view(jc, jk, jb);
z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
p_cc_view(jc, jk, jb);
z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
p_cc_view(jc, jk, jb);
z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
p_cc_view(jc, jk, jb);
z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
p_cc_view(jc, jk, jb);
z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
p_cc_view(jc, jk, jb);
p_coeff_view(9, jc, jk, jb) =
lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8);
p_coeff_view(8, jc, jk, jb) =
lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8);
p_coeff_view(7, jc, jk, jb) =
lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8);
p_coeff_view(6, jc, jk, jb) =
lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8);
p_coeff_view(5, jc, jk, jb) =
lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8);
p_coeff_view(4, jc, jk, jb) =
lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8);
p_coeff_view(3, jc, jk, jb) =
lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8);
p_coeff_view(2, jc, jk, jb) =
lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8);
p_coeff_view(1, jc, jk, jb) =
lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8);
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) -
p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) -
p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) -
p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) -
p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8);
});
}
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD);
template <typename T>
void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk,
const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,
bool lacc, int nlev, int nblks_c, int nblks_e) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) {
div_vec_c_view(jc, jk, jb) =
vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
});
}
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D);
template <typename T>
void div3d_2field(const T *vec_e, const int *cell_edge_idx,
const int *cell_edge_blk, const T *geofac_div, T *div_vec_c,
const T *in2, T *out2, int i_startblk, int i_endblk,
int i_startidx_in, int i_endidx_in, int slev, int elev,
int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e);
UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div3d_2field_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
div_vec_c_view(jc, jk, jb) =
vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
out2_view(jc, jk, jb) =
in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
});
}
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD);
template <typename T>
void div4d(const int *cell_edge_idx, const int *cell_edge_blk,
const T *geofac_div, const T *f4din, T *f4dout, int dim4d,
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
const int *slev, const int *elev, int nproma, bool lacc, int nlev,
int nblks_c, int nblks_e) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstT4D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT4D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d);
UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
for (int ji = 0; ji < dim4d; ++ji) {
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx},
{elev[ji], i_endidx});
Kokkos::parallel_for(
"div4d_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
f4dout_view(jc, jk, jb, ji) =
f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) *
geofac_div_view(jc, 0, jb) +
f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) *
geofac_div_view(jc, 1, jb) +
f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) *
geofac_div_view(jc, 2, jb);
});
}
}
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D);
template <typename T>
void div_avg(const T *vec_e, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const int *cell_edge_idx,
const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff,
T *div_vec_c, const T *opt_in2, T *opt_out2,
const int *i_startblk_in, const int *i_endblk_in,
const int *i_startidx_in, const int *i_endidx_in, int slev,
int elev, int nproma, int patch_id, bool l_limited_area,
bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3);
UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3);
UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3);
UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e);
UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c);
UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e);
UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c);
Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c);
Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c);
int i_startblk = i_startblk_in[0];
int i_endblk = i_endblk_in[0];
if (l2fields) {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step1", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
aux_c(jc, jk, jb) =
vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
aux_c2(jc, jk, jb) =
opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
});
}
} else {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
aux_c(jc, jk, jb) =
vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
geofac_div_view(jc, 0, jb) +
vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
geofac_div_view(jc, 1, jb) +
vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
geofac_div_view(jc, 2, jb);
});
}
}
if (patch_id > 0 || l_limited_area) {
i_startblk = i_startblk_in[1];
i_endblk = i_endblk_in[1];
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step3", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb);
});
}
if (l2fields) {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step4", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb);
});
}
}
}
i_startblk = i_startblk_in[2];
i_endblk = i_endblk_in[2];
if (l2fields) {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step5", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
div_vec_c_view(jc, jk, jb) =
aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
avg_coeff_view(jc, 1, jb) +
aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
avg_coeff_view(jc, 2, jb) +
aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
avg_coeff_view(jc, 3, jb);
opt_out2_view(jc, jk, jb) =
aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
avg_coeff_view(jc, 1, jb) +
aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
avg_coeff_view(jc, 2, jb) +
aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
avg_coeff_view(jc, 3, jb);
});
}
} else {
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
i_startblk, i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"div_avg_step6", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
div_vec_c_view(jc, jk, jb) =
aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
avg_coeff_view(jc, 1, jb) +
aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
avg_coeff_view(jc, 2, jb) +
aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
avg_coeff_view(jc, 3, jb);
});
}
}
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG);
template <typename T>
void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx,
const int *vert_edge_blk, const T *geofac_rot, T *rot_vec,
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma,
bool lacc, int nlev, int nblks_e, int nblks_v) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6);
UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6);
UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v);
UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"rot_vertex_atmos_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jv) {
rot_vec_view(jv, jk, jb) =
vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) *
geofac_rot_view(jv, 0, jb) +
vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) *
geofac_rot_view(jv, 1, jb) +
vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) *
geofac_rot_view(jv, 2, jb) +
vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) *
geofac_rot_view(jv, 3, jb) +
vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) *
geofac_rot_view(jv, 4, jb) +
vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) *
geofac_rot_view(jv, 5, jb);
});
}
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS);
template <typename T>
void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx,
const int *vert_edge_blk, const T *geofac_rot, T *rot_vec,
int i_startblk, int i_endblk, int i_startidx_in,
int i_endidx_in, int slev, int elev, int nproma, bool lacc,
bool acc_async, int nlev, int nblks_e, int nblks_v) {
// Wrap raw pointers in unmanaged Kokkos Views.
typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedConstT3D;
typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
UnmanagedT3D;
typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
Kokkos::MemoryUnmanaged>
UnmanagedConstInt3D;
UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6);
UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6);
UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v);
UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v);
for (int jb = i_startblk; jb < i_endblk; ++jb) {
int i_startidx, i_endidx;
get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
i_endblk, i_startidx, i_endidx);
Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
{elev, i_endidx});
Kokkos::parallel_for(
"rot_vertex_atmos_inner", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jv) {
rot_vec_view(jv, jk, jb) =
vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) *
geofac_rot_view(jv, 0, jb) +
vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) *
geofac_rot_view(jv, 1, jb) +
vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) *
geofac_rot_view(jv, 2, jb) +
vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) *
geofac_rot_view(jv, 3, jb) +
vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) *
geofac_rot_view(jv, 4, jb) +
vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) *
geofac_rot_view(jv, 5, jb);
});
}
if (!acc_async)
Kokkos::fence();
}
ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI);
// ICON
//
// ---------------------------------------------------------------
// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
// Contact information: icon-model.org
//
// See AUTHORS.TXT for a list of authors
// See LICENSES/ for license information
// SPDX-License-Identifier: BSD-3-Clause
// ---------------------------------------------------------------
#pragma once
#include <Kokkos_Core.hpp>
#include <types.hpp>
#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \
void recon_lsq_cell_l( \
const _type *p_cc, const int *cell_neighbor_idx, \
const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \
const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \
const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \
int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \
bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \
int lsq_dim_unk, int lsq_dim_c)
#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \
void recon_lsq_cell_l_svd( \
const _type *p_cc, const int *cell_neighbor_idx, \
const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \
const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \
int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \
bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \
int lsq_dim_unk, int lsq_dim_c)
#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \
void recon_lsq_cell_q( \
const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \
const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \
const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \
int slev, int elev, int nproma, int patch_id, bool l_limited_area, \
bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \
void recon_lsq_cell_q_svd( \
const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \
const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \
int slev, int elev, int nproma, int patch_id, bool l_limited_area, \
bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \
void recon_lsq_cell_c( \
const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \
const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \
const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \
int slev, int elev, int nproma, int patch_id, bool l_limited_area, \
bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \
void recon_lsq_cell_c_svd( \
const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \
const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \
int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \
int slev, int elev, int nproma, int patch_id, \
bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \
int lsq_dim_c)
#define ICONMATH_DECLARE_DIV3D(_type) \
void div3d(const _type *vec_e, const int *cell_edge_idx, \
const int *cell_edge_blk, const _type *geofac_div, \
_type *div_vec_c, int i_startblk, int i_endblk, \
int i_startidx_in, int i_endidx_in, int slev, int elev, \
int nproma, bool lacc, int nlev, int nblks_c, int nblks_e)
#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \
void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \
const int *cell_edge_blk, const _type *geofac_div, \
_type *div_vec_c, const _type *in2, _type *out2, \
int i_startblk, int i_endblk, int i_startidx_in, \
int i_endidx_in, int slev, int elev, int nproma, \
bool lacc, int nlev, int nblks_c, int nblks_e)
#define ICONMATH_DECLARE_DIV4D(_type) \
void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \
const _type *geofac_div, const _type *f4din, _type *f4dout, \
int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \
int i_endidx_in, const int *slev, const int *elev, int nproma, \
bool lacc, int nlev, int nblks_c, int nblks_e)
#define ICONMATH_DECLARE_DIV_AVG(_type) \
void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \
const int *cell_neighbor_blk, const int *cell_edge_idx, \
const int *cell_edge_blk, const _type *geofac_div, \
const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \
_type *opt_out2, const int *i_startblk_in, \
const int *i_endblk_in, const int *i_startidx_in, \
const int *i_endidx_in, int slev, int elev, int nproma, \
int patch_id, bool l_limited_area, bool l2fields, bool lacc, \
int nlev, int nblks_c, int nblks_e)
#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \
void rot_vertex_atmos( \
const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \
const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \
int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \
bool lacc, int nlev, int nblks_e, int nblks_v)
#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \
void rot_vertex_ri( \
const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \
const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \
int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \
bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v)
// Declare as templates
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T);
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T);
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T);
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T);
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T);
template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T);
template <typename T> ICONMATH_DECLARE_DIV3D(T);
template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T);
template <typename T> ICONMATH_DECLARE_DIV4D(T);
template <typename T> ICONMATH_DECLARE_DIV_AVG(T);
template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T);
template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T);
This diff is collapsed.