Skip to content
Snippets Groups Projects

Draft: Do dot product with Kokkos subview

Open Yen-Chen Chen requested to merge subview_inner_product into feature-add-cpp-codes
2 files
+ 147
266
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 146
266
@@ -10,11 +10,14 @@
// ---------------------------------------------------------------
#include <iostream>
#include <numeric>
#include <vector>
#include <horizontal/lib_divrot.hpp>
#include <support/mo_lib_loopindices.hpp>
namespace KE = Kokkos::Experimental;
template <typename T>
void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx,
const int *cell_neighbor_blk, const T *lsq_qtmat_c,
@@ -261,51 +264,32 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
Kokkos::parallel_for(
"recon_lsq_cell_q_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
auto z_d_subview = subview(z_d, std::make_pair(0, 9), jc, jk);
auto lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 0, std::make_pair(0, 9), jb);
z_qt_times_d(0) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 1, std::make_pair(0, 9), jb);
z_qt_times_d(1) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 2, std::make_pair(0, 9), jb);
z_qt_times_d(2) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 3, std::make_pair(0, 9), jb);
z_qt_times_d(3) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 4, std::make_pair(0, 9), jb);
z_qt_times_d(4) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4);
p_coeff_view(4, jc, jk, jb) =
@@ -420,56 +404,32 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c,
Kokkos::parallel_for(
"recon_lsq_cell_q_svd_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
p_coeff_view(5, jc, jk, jb) =
lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk);
p_coeff_view(4, jc, jk, jb) =
lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk);
p_coeff_view(3, jc, jk, jb) =
lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk);
p_coeff_view(2, jc, jk, jb) =
lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk);
p_coeff_view(1, jc, jk, jb) =
lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) +
lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) +
lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) +
lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) +
lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) +
lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) +
lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) +
lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) +
lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk);
auto z_b_subview = subview(z_b, std::make_pair(0, 9), jc, jk);
auto lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 4, std::make_pair(0, 9), jb);
p_coeff_view(5, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_pseudoinv_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b_subview), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 3, std::make_pair(0, 9), jb);
p_coeff_view(4, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_pseudoinv_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b_subview), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 2, std::make_pair(0, 9), jb);
p_coeff_view(3, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_pseudoinv_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b_subview), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 1, std::make_pair(0, 9), jb);
p_coeff_view(2, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_pseudoinv_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b_subview), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 0, std::make_pair(0, 9), jb);
p_coeff_view(1, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_pseudoinv_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b_subview), 0.0);
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
@@ -565,87 +525,52 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
Kokkos::parallel_for(
"recon_lsq_cell_c_step2", innerPolicy,
KOKKOS_LAMBDA(const int jk, const int jc) {
z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk);
z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) +
lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) +
lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) +
lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) +
lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) +
lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) +
lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) +
lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) +
lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk);
auto z_d_subview = subview(z_d, std::make_pair(0, 9), jc, jk);
auto lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 0, std::make_pair(0, 9), jb);
z_qt_times_d(0) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 1, std::make_pair(0, 9), jb);
z_qt_times_d(1) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 2, std::make_pair(0, 9), jb);
z_qt_times_d(2) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 3, std::make_pair(0, 9), jb);
z_qt_times_d(3) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 4, std::make_pair(0, 9), jb);
z_qt_times_d(4) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 5, std::make_pair(0, 9), jb);
z_qt_times_d(5) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 6, std::make_pair(0, 9), jb);
z_qt_times_d(6) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 7, std::make_pair(0, 9), jb);
z_qt_times_d(7) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
lsq_qtmat_c_subview =
subview(lsq_qtmat_c_view, jc, 8, std::make_pair(0, 9), jb);
z_qt_times_d(8) = std::inner_product(KE::cbegin(lsq_qtmat_c_subview),
KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_d_subview), 0.0);
p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8);
p_coeff_view(8, jc, jk, jb) =
@@ -805,96 +730,51 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c,
z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
p_cc_view(jc, jk, jb);
p_coeff_view(9, jc, jk, jb) =
lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8);
p_coeff_view(8, jc, jk, jb) =
lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8);
p_coeff_view(7, jc, jk, jb) =
lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8);
p_coeff_view(6, jc, jk, jb) =
lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8);
p_coeff_view(5, jc, jk, jb) =
lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8);
p_coeff_view(4, jc, jk, jb) =
lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8);
p_coeff_view(3, jc, jk, jb) =
lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8);
p_coeff_view(2, jc, jk, jb) =
lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8);
p_coeff_view(1, jc, jk, jb) =
lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) +
lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) +
lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) +
lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) +
lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) +
lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) +
lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) +
lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) +
lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8);
auto lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 8, std::make_pair(0, 9), jb);
p_coeff_view(9, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 7, std::make_pair(0, 9), jb);
p_coeff_view(8, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 6, std::make_pair(0, 9), jb);
p_coeff_view(7, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 5, std::make_pair(0, 9), jb);
p_coeff_view(6, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 4, std::make_pair(0, 9), jb);
p_coeff_view(5, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 3, std::make_pair(0, 9), jb);
p_coeff_view(4, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 2, std::make_pair(0, 9), jb);
p_coeff_view(3, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 1, std::make_pair(0, 9), jb);
p_coeff_view(2, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
lsq_pseudoinv_subview =
subview(lsq_pseudoinv_view, jc, 0, std::make_pair(0, 9), jb);
p_coeff_view(1, jc, jk, jb) = std::inner_product(
KE::cbegin(lsq_qtmat_c_subview), KE::cend(lsq_qtmat_c_subview),
KE::cbegin(z_b), 0.0);
p_coeff_view(0, jc, jk, jb) =
p_cc_view(jc, jk, jb) -
p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
Loading