Skip to content
Snippets Groups Projects
Commit 8e9c6811 authored by Joerg Behrens's avatar Joerg Behrens
Browse files

provide simplified scenario with hierarchical parallelism

parent 66e2a427
No related branches found
No related tags found
No related merge requests found
......@@ -43,7 +43,7 @@ constexpr AllLevels all_levels;
# define inner_for(STR, START, END, LAMBDA) for(int jc=START; jc<END; ++jc) {LAMBDA(jc);};
#endif
static void validate(double* array, double *ref, int nblocks, int nlev, int nproma) {
static void validate(double* array, const double *ref, int nblocks, int nlev, int nproma) {
for (int i = 0; i < nblocks * nlev * nproma; ++i) {
//printf("check i=%d, array=%f, ref=%f\n",i, array[i] , ref[i]);
assert(array[i] == ref[i]);
......@@ -515,6 +515,32 @@ void scenario_6(double* array, int nblocks, int nlev, int nproma, bool print = t
}
#endif
void scenario_6b(double* array, int nblocks, int nlev, int nproma) {
std::cout << "scenario 6b: Adaptable Layout & Hierarchical parallelism" << std::endl;
//exit(1);
Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::MemoryUnmanaged> d_view(array, nblocks, nlev, nproma);
using team_policy = Kokkos::TeamPolicy<>;
using member_type = Kokkos::TeamPolicy<>::member_type;
timer.reset();
Kokkos::parallel_for(
"blocks", team_policy(nblocks, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) {
const int jb = teamMember.league_rank();
Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, nproma), [&](const int jc) {
update_field(d_view, jb, all_levels, jc);
});
});
Kokkos::fence();
printf("Time = %f ms\n\n", timer.seconds() * 1000);
//validate(array, nblocks, nlev, nproma);
}
#if 0
void scenario_7(double* array, int nblocks, int nlev, int nproma, bool print=true) {
......@@ -660,7 +686,7 @@ void scenario_8template(double* array, int nblocks, int nlev, int nproma) {
void openacc_calls(double* array, double *ref, int nblocks, int nlev, int nproma,
void openacc_calls(double* array, const double *ref, int nblocks, int nlev, int nproma,
std::function<void(double*, int, int, int)> func){
#if defined(DEMO_DEVICE)
#pragma acc enter data copyin(array[0:nblocks*nlev*nproma])
......@@ -702,6 +728,14 @@ void physop(double *array, int nproma, int nlev, int nblocks) {
}
void run_scenario(double *array, const double*array_in_ref, const double *array_out_ref,
int nblocks, int nlev, int nproma,
const std::function<void(double*, int, int, int)> &scenario) {
const size_t nbytes = nproma*nlev*nblocks*sizeof(double);
memcpy(array, array_in_ref, nbytes);
openacc_calls(array, array_out_ref, nblocks, nlev, nproma, scenario);
}
int main() {
int64_t ncells64 = atoi(std::getenv("NCELLS"));
assert(ncells64 < INT_MAX);
......@@ -734,7 +768,9 @@ int main() {
//memcpy(array, array_in_ref, nbytes);
//scenario_1(array, nblocks, nlev, nproma);
//validate(array, array_out_ref, nblocks, nlev, nproma);
run_scenario(array, array_in_ref, array_out_ref,
nblocks, nlev, nproma,
scenario_6b);
memcpy(array, array_in_ref, nbytes);
std::function<void(double*, int, int, int)> s_1b = scenario_1b;
openacc_calls(array, array_out_ref, nblocks, nlev, nproma, s_1b);
......
......@@ -17,6 +17,7 @@ then
ncells=(5000064)
nlev=(90)
nproma=(5000064)
#nproma=$((449*29))
else
#rm -rf build
cmake -B build -S . -DMU_ARCH=x86_64 -DCMAKE_CXX_FLAGS="-O3"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment