Skip to content
Snippets Groups Projects
Commit 79f5246a authored by Thomas Jahns's avatar Thomas Jahns :cartwheel:
Browse files

Add function to make stripes more succinct during copying.

* Instances where stripes can be optimized are pretty much
  guaranteed to coincide with fresh copies.
parent 82de03b3
No related branches found
No related tags found
No related merge requests found
......@@ -62,6 +62,7 @@
#include "xt/xt_idxempty.h"
#include "xt/xt_mpi.h"
#include "xt_idxlist_unpack.h"
#include "xt_stripe_util.h"
#include "core/core.h"
#include "core/ppm_xfuncs.h"
#include "ensure_array_size.h"
......@@ -308,11 +309,9 @@ idxlist_collection_get_index_stripes(Xt_idxlist idxlist,
size_t temp_stripes_array_size = 0;
int num_temp_stripes = 0;
if (collectionlist->num_idxlists > 0) {
xt_idxlist_get_index_stripes(collectionlist->idxlists[0], &temp_stripes,
&num_temp_stripes);
temp_stripes_array_size = (size_t)num_temp_stripes;
}
xt_idxlist_get_index_stripes(collectionlist->idxlists[0], &temp_stripes,
&num_temp_stripes);
temp_stripes_array_size = (size_t)num_temp_stripes;
for (int i = 1; i < collectionlist->num_idxlists; ++i) {
......@@ -325,8 +324,11 @@ idxlist_collection_get_index_stripes(Xt_idxlist idxlist,
ENSURE_ARRAY_SIZE(temp_stripes, temp_stripes_array_size,
num_temp_stripes + curr_num_stripes);
memcpy(temp_stripes + num_temp_stripes, curr_stripes,
(size_t)curr_num_stripes * sizeof(*curr_stripes));
curr_num_stripes
= (int)xt_stripes_merge_copy((size_t)curr_num_stripes,
temp_stripes + num_temp_stripes,
curr_stripes,
num_temp_stripes > 0);
free(curr_stripes);
......
......@@ -306,14 +306,15 @@ xt_idxstripes_new(struct Xt_stripe const * stripes, int num_stripes) {
* sizeof (struct Xt_stripe),
body_size = sizeof (struct Xt_stripe) * (size_t)num_stripes;
Xt_idxstripes idxstripes = xmalloc(header_size + body_size);
idxstripes->num_stripes = num_stripes;
{
struct Xt_stripe *stripes_assign
= (struct Xt_stripe *)(void *)((unsigned char *)idxstripes
+ header_size);
idxstripes->stripes = stripes_assign;
memcpy(stripes_assign, stripes,
(size_t)num_stripes * sizeof(*stripes_assign));
idxstripes->num_stripes
= (int)xt_stripes_merge_copy((size_t)num_stripes,
stripes_assign,
stripes, false);
}
result = idxstripes_aggregate(idxstripes, __func__);
} else
......
......@@ -48,8 +48,12 @@
#include <config.h>
#endif
#include <stdbool.h>
#include <stdlib.h>
#include "xt/xt_core.h"
#include "xt/xt_stripe.h"
#include "xt_stripe_util.h"
#include "core/ppm_xfuncs.h"
#include "instr.h"
......@@ -102,6 +106,48 @@ void xt_convert_indices_to_stripes(const Xt_int *restrict indices,
INSTR_STOP(instr);
}
size_t
xt_stripes_merge_copy(size_t num_stripes,
struct Xt_stripe *stripes_dst,
const struct Xt_stripe *stripes_src,
bool lookback)
{
size_t skip = 1;
if (num_stripes) {
if (lookback) {
Xt_int stride = stripes_src[0].stride,
prev_stride = stripes_dst[-1].stride,
start = stripes_src[0].start,
prev_start = stripes_dst[-1].start;
if (stride == prev_stride
&& start == prev_start + stride * (Xt_int)stripes_dst[-1].nstrides) {
/* merge perfectly aligned stripes */
stripes_dst[-1].nstrides += stripes_src[0].nstrides;
++skip;
goto copy_loop;
}
}
stripes_dst[0] = stripes_src[0];
copy_loop:
if (num_stripes > 1)
for (size_t i = 1; i < num_stripes; ++i) {
Xt_int stride = stripes_src[i].stride,
prev_stride = stripes_dst[i-skip].stride,
start = stripes_src[i].start,
prev_start = stripes_dst[i-skip].start;
if (stride == prev_stride
&& start == prev_start + stride * (Xt_int)stripes_dst[i-skip].nstrides) {
/* merge perfectly aligned stripes */
stripes_dst[i-skip].nstrides += stripes_src[i].nstrides;
++skip;
} else
stripes_dst[i-skip+1] = stripes_src[i];
}
}
return num_stripes - (skip - 1);
}
/*
* Local Variables:
......
......@@ -50,6 +50,9 @@
#include <config.h>
#endif
#include <stdbool.h>
#include <stdlib.h>
#include "xt/xt_stripe.h"
#include "xt_arithmetic_util.h"
......@@ -80,6 +83,25 @@ xt_stripes_overlap(struct Xt_stripe a, struct Xt_stripe b) {
}
/**
* copy stripes_src to stripes_dst, fusing trivially adjacent stripes
* (i.e. having same stride and matching bounds)
*
* @param num_stripes number of stripes stored at stripes_src
* @param stripes_dst target array able to hold at least num_stripes stripes
* @param stripes_src source array containing \a num_stripes stripes to
* be copied to \a stripes_dst
* @param lookback if true, inspects also stripes_dst[-1] for possible
* fusion with stripes_src[0]
* @return number of stripes written to stripes_dst
*
*/
size_t
xt_stripes_merge_copy(size_t num_stripes,
struct Xt_stripe *stripes_dst,
const struct Xt_stripe *stripes_src,
bool lookback);
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment