Commit 7beaafe1 authored by Hendryk Bockelmann's avatar Hendryk Bockelmann
Browse files

fixed bug in get_sp for single MPI task runs

parent d02ff6a9
......@@ -228,7 +228,8 @@ void sct_create_global_timer_map(sct_context_type *con) {
for (int it=0; it<global_timer_num; it++)
fprintf(stderr,"[%d] post %d -> %s\n",pid,it,timer_list[it]);
for (int it=0; it<timer_num; it++)
fprintf(stderr,"[%d] map %d -> %d (%s)\n",pid,it,timer_map[it],timer_list[timer_map[it]]);
fprintf(stderr,"[%d] map %d -> %d (%s)\n", pid,
it, timer_map[it], timer_list[timer_map[it]]);
}
MPI_Type_free(&mpi_string);
......@@ -298,7 +299,7 @@ static void accumulate_stats(sct_stats_type *x, sct_stats_type *acc, int en) {
}
static void get_sp(sct_context_type *con, int proc_num, int sp_num, sct_stats_type *sp_stats_mem) {
static void get_sp(sct_context_type *con, int proc_choice, int sp_num, sct_stats_type *sp_stats_mem) {
// copy the timer-stats from (possibly) thread-private memory to shared memory
......@@ -308,10 +309,12 @@ static void get_sp(sct_context_type *con, int proc_num, int sp_num, sct_stats_ty
const int timer_num = sct_get_timer_num();
int timer_idx;
// serial phase:
// serial phase will be stored at the very end of sp_stats such that
// sp_stats[0...nb_timer-1][0...nb_threads-1] for parallel executed time
// sp_stats[0...nb_timer-1][nb_threads] for serial executed time
if (sp_num<1) sct_abort("get_sp: (sp_num<1)", __FILE__, __LINE__);
for (int it = 0; it < timer_num; it++) {
if (proc_num == 1) // SCT_REDUCE_ALL
if (proc_choice == SCT_REDUCE_ALL)
timer_idx = sct_get_global_idx(it);
else
timer_idx = it;
......@@ -335,7 +338,7 @@ static void get_sp(sct_context_type *con, int proc_num, int sp_num, sct_stats_ty
#endif
{
for (int it = 0; it < timer_num; it++) {
if (proc_num == 1) // SCT_REDUCE_ALL
if (proc_choice == SCT_REDUCE_ALL)
timer_idx = sct_get_global_idx(it);
else
timer_idx = it;
......@@ -1074,7 +1077,7 @@ sct_reduction_type *sct_reduction_new(int context_choice, int proc_choice, int t
int c=0;
for (int ip=0; ip<proc_num; ip++) {
for (int it=0; it<res->timer_num_per_rank[ip]; it++) {
fprintf(stderr,"DEBUG [%d] %d -> %d\n",ip,it,res->timer_map_per_rank[c]);
printf("DEBUG [%d] %d -> %d\n",ip,it,res->timer_map_per_rank[c]);
c++;
}
}
......@@ -1140,8 +1143,10 @@ sct_reduction_type *sct_reduction_new(int context_choice, int proc_choice, int t
res->m_thread_num = m_num;
res->r_thread_num = red_thread_num;
//printf("sct_reduction_new: proc_num=%d, red_proc_num=%d, p_num=%d, m_num=%d, red_thread_num=%d\n",
// proc_num, red_proc_num, p_num, m_num, red_thread_num );
if (my_debug)
printf("sct_reduction_new: proc_num=%d, red_proc_num=%d, p_num=%d, m_num=%d, red_thread_num=%d\n",
proc_num, red_proc_num, p_num, m_num, red_thread_num );
const int sp_num = p_num + 1;
int en = sct_get_event_num();
......@@ -1184,12 +1189,14 @@ sct_reduction_type *sct_reduction_new(int context_choice, int proc_choice, int t
sct_stats_type (*red_local)[red_thread_num] = (sct_stats_type (*)[red_thread_num]) red_local_mem;
#ifdef _OPENMP
get_sp(con, red_proc_num, sp_num, sp_stats_mem);
get_sp(con, proc_choice, sp_num, sp_stats_mem);
if (my_debug) {
for (int it = 0; it < timer_num; it++)
for (int tid = 0; tid < sp_num; tid++)
printf("debug I: sp_stats[%d][%d] = %d , %.2e , %d\n",it,tid,sp_stats[it][tid].cnum,sp_stats[it][tid].tsum,sp_stats[it][tid].active_under);
printf("debug I: sp_stats[%d][%d] = %d , %.2e , %d\n", it, tid,
sp_stats[it][tid].cnum, sp_stats[it][tid].tsum,
sp_stats[it][tid].active_under);
}
merge_sp(sp_num, m_num, sp_merging, sp_stats_mem, m_stats_mem);
......@@ -1197,7 +1204,9 @@ sct_reduction_type *sct_reduction_new(int context_choice, int proc_choice, int t
if (my_debug) {
for (int it = 0; it < timer_num; it++)
for (int tid = 0; tid < m_num; tid++)
printf("debug II: m_stats[%d][%d] = %d , %.2e , %d\n",it,tid,m_stats[it][tid].cnum,m_stats[it][tid].tsum,m_stats[it][tid].active_under);
printf("debug II: m_stats[%d][%d] = %d , %.2e , %d\n", it, tid,
m_stats[it][tid].cnum, m_stats[it][tid].tsum,
m_stats[it][tid].active_under);
}
for (int it = 0; it<timer_num; it++) {
......@@ -1212,12 +1221,14 @@ sct_reduction_type *sct_reduction_new(int context_choice, int proc_choice, int t
if (my_debug) {
for (int it = 0; it < timer_num; it++)
for (int tid = 0; tid < red_thread_num; tid++)
printf("debug III: red_local[%d][%d] = %d , %.2e , %d\n",it,tid,red_local[it][tid].cnum,red_local[it][tid].tsum,red_local[it][tid].active_under);
printf("debug III: red_local[%d][%d] = %d , %.2e , %d\n", it, tid,
red_local[it][tid].cnum, red_local[it][tid].tsum,
red_local[it][tid].active_under);
}
#else
if (sp_num != 1 || red_thread_num != 1) sct_abort("sct_reduction_new: internal error", __FILE__, __LINE__);
get_sp(con, red_proc_num, red_thread_num, red_local_mem);
get_sp(con, proc_choice, red_thread_num, red_local_mem);
if (my_debug) {
int tnum;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment