Skip to content
Snippets Groups Projects
Commit e4c23bec authored by Joerg Behrens's avatar Joerg Behrens Committed by Georgiana Mania
Browse files

Check bounds + case: parallel over blocks

parent 9b73fcf4
No related branches found
No related tags found
1 merge request!2Check bounds + case: parallel over blocks
#include <iostream>
#include <Kokkos_Core.hpp>
#include "Kokkos_Timer.hpp"
#include <cassert>
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
typedef Kokkos::MDRangePolicy<Kokkos::DefaultExecutionSpace, Kokkos::IndexType<int>, Kokkos::Rank<3>> md_range_policy;
......@@ -12,12 +13,21 @@ typedef Kokkos::MDRangePolicy<Kokkos::DefaultExecutionSpace, Kokkos::IndexType<i
//constexpr int nlev = 90;
//constexpr int nproma = 55000;
//#define ENABLE_CHECK_BOUNDS
static void validate(double* array, int nblocks, int nlev, int nproma) {
for (int i = 0; i < nblocks * nlev * nproma; ++i) {
assert(array[i] == static_cast<double>(i));
}
}
inline void check_bounds(int i1, int i2, int i3, int n1, int n2, int n3) {
#ifdef ENABLE_CHECK_BOUNDS
assert(i1 >=0 && i2 >= 0 && i3 >= 0 &&
i1 < n1 && i2 < n2 && i3 < n3);
#endif
}
void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
......@@ -33,6 +43,7 @@ void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print=tru
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
check_bounds(jb,jk,jc, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jb, jk, jc) = p;
}});
......@@ -59,6 +70,7 @@ void scenario_2(double* array, int nblocks, int nlev, int nproma, bool print=tru
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jc * nlev * nblocks + jk * nblocks + jb;
check_bounds(jc,jk,jb, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
......@@ -87,6 +99,7 @@ void scenario_2b(double* array, int nblocks, int nlev, int nproma, bool print=tr
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
check_bounds(jb,jk,jc, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jb, jk, jc) = p;
// printf("%f ", d_view(jb, jk, jc));
......@@ -100,6 +113,7 @@ void scenario_2b(double* array, int nblocks, int nlev, int nproma, bool print=tr
}
// slow on CPU
void scenario_3(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
......@@ -115,6 +129,7 @@ void scenario_3(double* array, int nblocks, int nlev, int nproma, bool print=tru
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
check_bounds(jc,jk,jb, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
......@@ -143,6 +158,7 @@ void scenario_4(double* array, int nblocks, int nlev, int nproma, bool print=tru
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jc * nlev * nblocks + jk * nblocks + jb;
check_bounds(jc,jk,jb, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
......@@ -186,6 +202,35 @@ void scenario_5(double* array, int nblocks, int nlev, int nproma, bool print=tru
}
void scenario_6(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nblocks, nlev, nproma);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nblocks), KOKKOS_LAMBDA (const int jb) {
//for (int jb = 0 ; jb < nblocks; ++jb) {
for (int jc = 0 ; jc < nproma; ++jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
check_bounds(jb,jk,jc, d_view.extent(0), d_view.extent(1), d_view.extent(2));
d_view(jb, jk, jc) = p;
// printf("%f ", d_view(jb, jk, jc));
}
}
});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
int main() {
......@@ -230,6 +275,7 @@ int main() {
scenario_3(array, nblocks, nlev, nproma);
scenario_4(array, nblocks, nlev, nproma);
scenario_5(array, nblocks, nlev, nproma);
scenario_6(array, nblocks, nlev, nproma);
}
Kokkos::finalize();
......@@ -259,4 +305,4 @@ int main() {
// printf("%f ", d_view(jb, jk, jc));
}});
*/
\ No newline at end of file
*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment