Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • k202174/demo
1 result
Show changes
Commits on Source (3)
......@@ -43,7 +43,7 @@ constexpr AllLevels all_levels;
# define inner_for(STR, START, END, LAMBDA) for(int jc=START; jc<END; ++jc) {LAMBDA(jc);};
#endif
static void validate(double* array, double *ref, int nblocks, int nlev, int nproma) {
static void validate(double* array, const double *ref, int nblocks, int nlev, int nproma) {
for (int i = 0; i < nblocks * nlev * nproma; ++i) {
//printf("check i=%d, array=%f, ref=%f\n",i, array[i] , ref[i]);
assert(array[i] == ref[i]);
......@@ -94,14 +94,24 @@ void show_view(const std::string &label, const ViewType &view, const bool with_v
Kokkos::fence();
}
template<typename ViewType>
inline HOST_DEVICE_ATTRIBUTES void update_column(ViewType &v) {
const int nlev = v.extent(0);
{int k = 0; v(k) += v(k+1)/10000;}
for (int k=1; k < nlev-1; ++k) {
v(k) += v(k-1)/100 + v(k+1)/10000;
}
{int k = nlev-1; v(k) += v(k-1)/100;}
}
template<typename ViewType>
inline HOST_DEVICE_ATTRIBUTES void update_field(ViewType &v, int i0, AllLevels all, int i2) {
const int nlev = v.extent(1);
check_bounds(i0, i2, v.extent(0), v.extent(2));
for (int k=1; k < nlev-1; ++k) {
v(i0,k,i2) = v(i0,k,i2) + v(i0,k-1,i2)/100 + v(i0,k+1,i2)/10000;
}
auto column = subview(v,i0,Kokkos::ALL, i2); // why can't we use the RHS as argument to update_column directly?
update_column(column);
}
void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print = true) {
......@@ -178,10 +188,8 @@ void scenario_1c(double* array, int nblocks, int nlev, int nproma) {
Kokkos::parallel_for(
"", Kokkos::RangePolicy<>(0, ncells), KOKKOS_LAMBDA(const int jc) {
check_bounds(jc, nlev-2, d_view2d.extent(0), d_view2d.extent(1));
for (int jk = 1; jk < nlev-1; ++jk) {
//printf("jc=%d, jk=%d, lhs=%f, rhs1=%f, rhs2=%f\n", jc, jk,d_view2d(jc,jk), d_view2d(jc,jk-1), d_view2d(jc,jk+1) );
d_view2d(jc,jk) = d_view2d(jc,jk) + d_view2d(jc,jk-1)/100 + d_view2d(jc,jk+1)/10000;
}
auto column = subview(d_view2d,jc,Kokkos::ALL);
update_column(column);
});
Kokkos::fence();
printf("Time = %f ms\n", timer.seconds() * 1000);
......@@ -234,10 +242,8 @@ void scenario_1cmacro(double* array, int nblocks, int nlev, int nproma) {
Kokkos::parallel_for(
"", Kokkos::RangePolicy<>(0, ncells), KOKKOS_LAMBDA(const int jc) {
check_bounds(jc, nlev-2, d_view.extent(0), d_view.extent(1));
for (int jk = 1; jk < nlev-1; ++jk) {
//printf("jc=%d, jk=%d, lhs=%f, rhs1=%f, rhs2=%f\n", jc, jk,d_view(jc,jk), d_view(jc,jk-1), d_view(jc,jk+1) );
d_view(jc,jk) = d_view(jc,jk) + d_view(jc,jk-1)/100 + d_view(jc,jk+1)/10000;
}
auto column = subview(d_view,jc,Kokkos::ALL);
update_column(column);
});
Kokkos::fence();
......@@ -515,6 +521,32 @@ void scenario_6(double* array, int nblocks, int nlev, int nproma, bool print = t
}
#endif
void scenario_6b(double* array, int nblocks, int nlev, int nproma) {
std::cout << "scenario 6b: Adaptable Layout & Hierarchical parallelism" << std::endl;
//exit(1);
Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma);
using team_policy = Kokkos::TeamPolicy<>;
using member_type = Kokkos::TeamPolicy<>::member_type;
timer.reset();
Kokkos::parallel_for(
"blocks", team_policy(nblocks, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) {
const int jb = teamMember.league_rank();
Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, nproma), [&](const int jc) {
update_field(d_view, jb, all_levels, jc);
});
});
Kokkos::fence();
printf("Time = %f ms\n\n", timer.seconds() * 1000);
//validate(array, nblocks, nlev, nproma);
}
#if 0
void scenario_7(double* array, int nblocks, int nlev, int nproma, bool print=true) {
......@@ -660,7 +692,7 @@ void scenario_8template(double* array, int nblocks, int nlev, int nproma) {
void openacc_calls(double* array, double *ref, int nblocks, int nlev, int nproma,
void openacc_calls(double* array, const double *ref, int nblocks, int nlev, int nproma,
std::function<void(double*, int, int, int)> func){
#if defined(DEMO_DEVICE)
#pragma acc enter data copyin(array[0:nblocks*nlev*nproma])
......@@ -702,6 +734,14 @@ void physop(double *array, int nproma, int nlev, int nblocks) {
}
void run_scenario(double *array, const double*array_in_ref, const double *array_out_ref,
int nblocks, int nlev, int nproma,
const std::function<void(double*, int, int, int)> &scenario) {
const size_t nbytes = nproma*nlev*nblocks*sizeof(double);
memcpy(array, array_in_ref, nbytes);
openacc_calls(array, array_out_ref, nblocks, nlev, nproma, scenario);
}
int main() {
int64_t ncells64 = atoi(std::getenv("NCELLS"));
assert(ncells64 < INT_MAX);
......@@ -734,7 +774,9 @@ int main() {
//memcpy(array, array_in_ref, nbytes);
//scenario_1(array, nblocks, nlev, nproma);
//validate(array, array_out_ref, nblocks, nlev, nproma);
run_scenario(array, array_in_ref, array_out_ref,
nblocks, nlev, nproma,
scenario_6b);
memcpy(array, array_in_ref, nbytes);
std::function<void(double*, int, int, int)> s_1b = scenario_1b;
openacc_calls(array, array_out_ref, nblocks, nlev, nproma, s_1b);
......
......@@ -17,6 +17,7 @@ then
ncells=(5000064)
nlev=(90)
nproma=(5000064)
#nproma=$((449*29))
else
#rm -rf build
cmake -B build -S . -DMU_ARCH=x86_64 -DCMAKE_CXX_FLAGS="-O3"
......