xt_xmap_intersection.c 39.6 KB
Newer Older
1 2 3
/**
 * @file xt_xmap_intersection.c
 *
Thomas Jahns's avatar
Thomas Jahns committed
4 5
 * @copyright Copyright  (C)  2016 Jörg Behrens <behrens@dkrz.de>
 *                                 Moritz Hanke <hanke@dkrz.de>
6 7
 *                                 Thomas Jahns <jahns@dkrz.de>
 *
Thomas Jahns's avatar
Thomas Jahns committed
8 9
 * @author Jörg Behrens <behrens@dkrz.de>
 *         Moritz Hanke <hanke@dkrz.de>
10 11 12 13
 *         Thomas Jahns <jahns@dkrz.de>
 */
/*
 * Keywords:
Thomas Jahns's avatar
Thomas Jahns committed
14 15
 * Maintainer: Jörg Behrens <behrens@dkrz.de>
 *             Moritz Hanke <hanke@dkrz.de>
16
 *             Thomas Jahns <jahns@dkrz.de>
Moritz Hanke's avatar
Moritz Hanke committed
17
 * URL: https://doc.redmine.dkrz.de/yaxt/html/
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are  permitted provided that the following conditions are
 * met:
 *
 * Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *
 * Neither the name of the DKRZ GmbH nor the names of its contributors
 * may be used to endorse or promote products derived from this software
 * without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
Thomas Jahns's avatar
Thomas Jahns committed
46 47 48
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
49

50 51 52 53 54 55
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <limits.h>

56
#include <mpi.h>
57

58
#include "xt/xt_idxlist.h"
59
#include "xt/xt_idxvec.h"
60
#include "xt/xt_xmap.h"
61
#include "xt_xmap_internal.h"
62
#include "xt/xt_mpi.h"
Thomas Jahns's avatar
Thomas Jahns committed
63
#include "xt_mpi_internal.h"
64 65
#include "core/core.h"
#include "core/ppm_xfuncs.h"
66
#include "xt/xt_xmap_intersection.h"
67
#include "xt_xmap_intersection_common.h"
68
#include "ensure_array_size.h"
69
#include "xt_arithmetic_util.h"
70
#include "xt/quicksort.h"
71 72 73 74

static MPI_Comm     xmap_intersection_get_communicator(Xt_xmap xmap);
static int          xmap_intersection_get_num_destinations(Xt_xmap xmap);
static int          xmap_intersection_get_num_sources(Xt_xmap xmap);
75 76 77 78
static void
xmap_intersection_get_destination_ranks(Xt_xmap xmap, int * ranks);
static void
xmap_intersection_get_source_ranks(Xt_xmap xmap, int * ranks);
79 80
static Xt_xmap_iter xmap_intersection_get_in_iterator(Xt_xmap xmap);
static Xt_xmap_iter xmap_intersection_get_out_iterator(Xt_xmap xmap);
Thomas Jahns's avatar
Thomas Jahns committed
81
static Xt_xmap      xmap_intersection_copy(Xt_xmap xmap);
82
static void         xmap_intersection_delete(Xt_xmap xmap);
83
static int          xmap_intersection_iterator_next(Xt_xmap_iter iter);
84
static int          xmap_intersection_iterator_get_rank(Xt_xmap_iter iter);
85 86
static int const *
xmap_intersection_iterator_get_transfer_pos(Xt_xmap_iter iter);
87
static int
88
xmap_intersection_iterator_get_num_transfer_pos(Xt_xmap_iter iter);
89 90 91 92
static const struct Xt_pos_ext *
xmap_intersection_iterator_get_transfer_pos_ext(Xt_xmap_iter iter);
static int
xmap_intersection_iterator_get_num_transfer_pos_ext(Xt_xmap_iter iter);
93 94 95
static void         xmap_intersection_iterator_delete(Xt_xmap_iter iter);
static int          xmap_intersection_get_max_src_pos(Xt_xmap xmap);
static int          xmap_intersection_get_max_dst_pos(Xt_xmap xmap);
96 97
static Xt_xmap
xmap_intersection_reorder(Xt_xmap xmap, enum xt_reorder_type type);
98 99 100 101
static Xt_xmap
xmap_intersection_update_positions(Xt_xmap xmap,
                                   const int * src_positions,
                                   const int * dst_positions);
102 103 104 105
static Xt_xmap
xmap_intersection_spread(Xt_xmap xmap, int num_repetitions,
                         const int src_displacements[num_repetitions],
                         const int dst_displacements[num_repetitions]);
106 107


108
static const struct Xt_xmap_iter_vtable
109
xmap_iterator_intersection_vtable = {
110
  .next                 = xmap_intersection_iterator_next,
111 112 113
  .get_rank             = xmap_intersection_iterator_get_rank,
  .get_transfer_pos     = xmap_intersection_iterator_get_transfer_pos,
  .get_num_transfer_pos = xmap_intersection_iterator_get_num_transfer_pos,
114 115 116
  .get_transfer_pos_ext = xmap_intersection_iterator_get_transfer_pos_ext,
  .get_num_transfer_pos_ext
  = xmap_intersection_iterator_get_num_transfer_pos_ext,
117
  .delete               = xmap_intersection_iterator_delete};
118

119
typedef struct Xt_xmap_iter_intersection_ *Xt_xmap_iter_intersection;
120

121 122 123
struct Xt_xmap_iter_intersection_ {

  const struct Xt_xmap_iter_vtable * vtable;
124 125 126 127 128

  struct exchange_data * msg;
  int msgs_left;
};

Thomas Jahns's avatar
Thomas Jahns committed
129 130 131 132 133 134
static inline Xt_xmap_iter_intersection
xmii(void *iter)
{
  return (Xt_xmap_iter_intersection)iter;
}

135

136
static const struct Xt_xmap_vtable xmap_intersection_vtable = {
137 138 139 140 141 142 143
        .get_communicator      = xmap_intersection_get_communicator,
        .get_num_destinations  = xmap_intersection_get_num_destinations,
        .get_num_sources       = xmap_intersection_get_num_sources,
        .get_destination_ranks = xmap_intersection_get_destination_ranks,
        .get_source_ranks      = xmap_intersection_get_source_ranks,
        .get_out_iterator      = xmap_intersection_get_out_iterator,
        .get_in_iterator       = xmap_intersection_get_in_iterator,
Thomas Jahns's avatar
Thomas Jahns committed
144
        .copy                  = xmap_intersection_copy,
145 146
        .delete                = xmap_intersection_delete,
        .get_max_src_pos       = xmap_intersection_get_max_src_pos,
147
        .get_max_dst_pos       = xmap_intersection_get_max_dst_pos,
148
        .reorder               = xmap_intersection_reorder,
149 150
        .update_pos            = xmap_intersection_update_positions,
        .spread                = xmap_intersection_spread};
151 152 153

struct exchange_data {
  // list of relative positions in memory to send or receive
154
  int * transfer_pos;
155 156
  struct Xt_pos_ext *transfer_pos_ext_cache;
  int num_transfer_pos, num_transfer_pos_ext;
157 158 159
  int rank;
};

160
struct Xt_xmap_intersection_ {
161 162 163

  const struct Xt_xmap_vtable * vtable;

164
  int n_in, n_out;
165 166 167

  // we need the max position in order to enable quick range-checks
  // for xmap-users like redist
168
  int max_src_pos; // max possible pos over all src transfer_pos (always >= 0)
169
  int max_dst_pos; // same for dst
170
  int tag_offset;  /* add to make tags on same communicator non-overlapping */
171 172

  MPI_Comm comm;
Thomas Jahns's avatar
Thomas Jahns committed
173
  struct exchange_data msg[];
174 175
};

176
typedef struct Xt_xmap_intersection_ *Xt_xmap_intersection;
177

Thomas Jahns's avatar
Thomas Jahns committed
178 179 180 181 182 183
static inline Xt_xmap_intersection
xmi(void *xmap)
{
  return (Xt_xmap_intersection)xmap;
}

184
static MPI_Comm xmap_intersection_get_communicator(Xt_xmap xmap) {
185

Thomas Jahns's avatar
Thomas Jahns committed
186
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
187 188 189 190 191 192

  return xmap_intersection->comm;
}

static int xmap_intersection_get_num_destinations(Xt_xmap xmap) {

Thomas Jahns's avatar
Thomas Jahns committed
193
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
194

195
  // the number of destinations equals the number of source messages
196
  return xmap_intersection->n_out;
197 198 199 200
}

static int xmap_intersection_get_num_sources(Xt_xmap xmap) {

Thomas Jahns's avatar
Thomas Jahns committed
201
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
202

203
  // the number of sources equals the number of destination messages
204
  return xmap_intersection->n_in;
205 206 207 208
}

static void xmap_intersection_get_destination_ranks(Xt_xmap xmap, int * ranks) {

Thomas Jahns's avatar
Thomas Jahns committed
209
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
Thomas Jahns's avatar
Thomas Jahns committed
210 211
  struct exchange_data *restrict out_msg
    = xmap_intersection->msg + xmap_intersection->n_in;
212
  for (int i = 0; i < xmap_intersection->n_out; ++i)
Thomas Jahns's avatar
Thomas Jahns committed
213
    ranks[i] = out_msg[i].rank;
214 215 216 217
}

static void xmap_intersection_get_source_ranks(Xt_xmap xmap, int * ranks) {

Thomas Jahns's avatar
Thomas Jahns committed
218
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
Thomas Jahns's avatar
Thomas Jahns committed
219
  struct exchange_data *restrict in_msg = xmap_intersection->msg;
220
  for (int i = 0; i < xmap_intersection->n_in; ++i)
Thomas Jahns's avatar
Thomas Jahns committed
221
    ranks[i] = in_msg[i].rank;
222 223
}

224 225 226 227
enum {
  bitsPerCoverageElement = sizeof (unsigned long) * CHAR_BIT,
};

228 229 230 231 232 233
struct tpd_result {
  Xt_int *indices_to_remove;
  int resCount;
  bool all_dst_covered;
};

234
/* compute list positions for recv direction */
235
static struct tpd_result
Thomas Jahns's avatar
Thomas Jahns committed
236 237 238 239
generate_dir_transfer_pos_dst(
  int num_intersections,
  const struct Xt_com_list intersections[num_intersections],
  Xt_idxlist mypart_idxlist,
Thomas Jahns's avatar
Thomas Jahns committed
240
  struct exchange_data *restrict resSets,
Thomas Jahns's avatar
Thomas Jahns committed
241
  int *restrict num_indices_to_remove_per_intersection)
Thomas Jahns's avatar
Thomas Jahns committed
242
{
243 244 245 246 247 248 249 250 251
  int mypart_num_indices = xt_idxlist_get_num_indices(mypart_idxlist);
  size_t coverage_size = (size_t)mypart_num_indices;
  coverage_size = (coverage_size + bitsPerCoverageElement - 1)
    /bitsPerCoverageElement;
  unsigned long *restrict coverage = xcalloc(coverage_size, sizeof(*coverage));
  /* set uncovered top-most bits to ease later comparison */
  if (mypart_num_indices%bitsPerCoverageElement)
    coverage[coverage_size-1]
      = ~((1UL << (mypart_num_indices%bitsPerCoverageElement)) - 1UL);
252

253
  int new_num_intersections = 0;
Thomas Jahns's avatar
Thomas Jahns committed
254 255
  size_t total_num_indices_to_remove = 0;
  size_t curr_indices_to_remove_size = 0;
256
  Xt_int *restrict indices_to_remove = NULL;
257
  int *restrict intersection_pos = NULL;
258

259
  for (int i = 0; i < num_intersections; ++i) {
260

261
    const Xt_int *restrict intersection_idxvec
262
      = xt_idxlist_get_indices_const(intersections[i].list);
263
    int max_intersection_size
264
      = xt_idxlist_get_num_indices(intersections[i].list);
265 266 267
    intersection_pos
      = xrealloc(intersection_pos,
                 (size_t)max_intersection_size * sizeof(*intersection_pos));
268

269 270 271 272 273 274
#ifndef NDEBUG
    int retval =
#endif
      xt_idxlist_get_positions_of_indices(
        mypart_idxlist, intersection_idxvec, max_intersection_size,
        intersection_pos, 1);
Thomas Jahns's avatar
Thomas Jahns committed
275
    assert(retval == 0);
276

Thomas Jahns's avatar
Thomas Jahns committed
277 278
    // we have to enforce single_match_only not only within a single
    // intersection, but also between all intersections
279

280 281 282
    int intersection_size = 0;
    int num_indices_to_remove_isect = 0;

Thomas Jahns's avatar
Thomas Jahns committed
283
    /* at most max_intersection_size many indices need to be removed */
284
    ENSURE_ARRAY_SIZE(indices_to_remove, curr_indices_to_remove_size,
285 286 287 288 289 290 291 292 293 294 295 296 297
                      total_num_indices_to_remove
                      + (size_t)max_intersection_size);

    for (int j = 0; j < max_intersection_size; ++j) {

      int pos = intersection_pos[j];
      /* the predicate effectively conditionalizes adding of either
       * the position to intersection_pos
       *   if the current value was NOT already in another intersection
       * or
       * the index to indices_to_remove_
       *   if the current value was already in another intersection
       */
298
      unsigned long mask = 1UL << (pos % bitsPerCoverageElement);
299
      int is_duplicate = (coverage[pos/bitsPerCoverageElement] & mask) != 0UL;
300
      intersection_pos[intersection_size] = pos;
301 302
      indices_to_remove[total_num_indices_to_remove
                        + (size_t)num_indices_to_remove_isect]
303
        = intersection_idxvec[j];
304 305
      intersection_size += is_duplicate ^ 1;
      num_indices_to_remove_isect += is_duplicate;
306
      coverage[pos/bitsPerCoverageElement] |= mask;
307 308
    }

309 310
    total_num_indices_to_remove += (size_t)num_indices_to_remove_isect;
    num_indices_to_remove_per_intersection[i] = num_indices_to_remove_isect;
311 312

    if (intersection_size > 0) {
Thomas Jahns's avatar
Thomas Jahns committed
313 314 315 316
      resSets[new_num_intersections].transfer_pos = intersection_pos;
      resSets[new_num_intersections].num_transfer_pos = intersection_size;
      resSets[new_num_intersections].transfer_pos_ext_cache = NULL;
      resSets[new_num_intersections].num_transfer_pos_ext
317
        = (int)count_pos_ext((size_t)intersection_size, intersection_pos);
Thomas Jahns's avatar
Thomas Jahns committed
318
      resSets[new_num_intersections].rank = intersections[i].rank;
319
      new_num_intersections++;
320
      intersection_pos = NULL;
321
    }
322
  }
323
  free(intersection_pos);
324

325 326 327
  indices_to_remove
    = xrealloc(indices_to_remove, total_num_indices_to_remove
               * sizeof (*indices_to_remove));
328

329 330 331 332 333 334
  // check resulting bit map
  unsigned long all_bits_set = ~0UL;
  for (size_t i = 0; i < coverage_size; ++i)
    all_bits_set &= coverage[i];

  free(coverage);
335 336 337 338
  return (struct tpd_result){
    .indices_to_remove = indices_to_remove,
    .resCount = new_num_intersections,
    .all_dst_covered = all_bits_set == ~0UL };
339 340
}

341 342 343

struct pos_count_max {
  int count, max_pos;
344 345
};

346
/* compute list positions for send direction */
347
static struct pos_count_max
348
generate_dir_transfer_pos_src(int num_intersections,
349 350 351
                              const struct Xt_com_list
                              intersections[num_intersections],
                              Xt_idxlist mypart_idxlist,
Thomas Jahns's avatar
Thomas Jahns committed
352
                              struct exchange_data *restrict resSets,
Thomas Jahns's avatar
Thomas Jahns committed
353 354
                              const Xt_int *indices_to_remove,
                              const int *num_indices_to_remove_per_intersection)
355
{
356 357
  int new_num_intersections = 0;
  int offset = 0;
358 359

  Xt_int * new_intersection_idxvec = NULL;
Thomas Jahns's avatar
Thomas Jahns committed
360
  size_t curr_new_intersection_idxvec_size = 0;
361
  int *restrict intersection_pos = NULL;
362
  int max_pos_ = -1;
363

364
  for (int i = 0; i < num_intersections; ++i) {
365

366
    const Xt_int *restrict intersection_idxvec
367
      = xt_idxlist_get_indices_const(intersections[i].list);
368 369
    int intersection_size
      = xt_idxlist_get_num_indices(intersections[i].list);
370 371 372
    intersection_pos = xrealloc(intersection_pos,
                                (size_t)intersection_size
                                * sizeof(*intersection_pos));
373

374 375
    int num_indices_to_remove = num_indices_to_remove_per_intersection[i];
    if (num_indices_to_remove > 0) {
376

Thomas Jahns's avatar
Thomas Jahns committed
377 378
      ENSURE_ARRAY_SIZE(
        new_intersection_idxvec, curr_new_intersection_idxvec_size,
379
        intersection_size - num_indices_to_remove + 1);
380
      int new_intersection_size = 0;
381

382
      for (int j = 0; j < intersection_size; ++j) {
383

384
        int discard = 0;
385

386
        Xt_int idx = intersection_idxvec[j];
387 388
        /* could be improved with BLOOM-filter if
         * num_indices_to_remove was sufficiently large */
389
        for (int k = 0; k < num_indices_to_remove; ++k)
390
          discard |= (idx == indices_to_remove[offset + k]);
391

392
        new_intersection_idxvec[new_intersection_size] = idx;
393
        new_intersection_size += !discard;
394 395 396 397
      }

      intersection_idxvec = new_intersection_idxvec;
      intersection_size = new_intersection_size;
398
      offset = offset + num_indices_to_remove;
399 400
    }

401 402 403 404 405 406
#ifndef NDEBUG
    int retval =
#endif
      xt_idxlist_get_positions_of_indices(
        mypart_idxlist, intersection_idxvec, intersection_size,
        intersection_pos, 0);
Thomas Jahns's avatar
Thomas Jahns committed
407
    assert(retval == 0);
408 409

    if (intersection_size > 0) {
Thomas Jahns's avatar
Thomas Jahns committed
410 411
      resSets[new_num_intersections].transfer_pos = intersection_pos;
      resSets[new_num_intersections].num_transfer_pos = intersection_size;
412 413
      for (int j = 0; j < intersection_size; ++j)
        if (intersection_pos[j] > max_pos_) max_pos_ = intersection_pos[j];
Thomas Jahns's avatar
Thomas Jahns committed
414 415
      resSets[new_num_intersections].transfer_pos_ext_cache = NULL;
      resSets[new_num_intersections].num_transfer_pos_ext
416
        = (int)count_pos_ext((size_t)intersection_size, intersection_pos);
Thomas Jahns's avatar
Thomas Jahns committed
417
      resSets[new_num_intersections].rank = intersections[i].rank;
418
      new_num_intersections++;
419
      intersection_pos = NULL;
420 421 422 423
    }
  }

  free(new_intersection_idxvec);
Thomas Jahns's avatar
Thomas Jahns committed
424
  free(intersection_pos);
425

426 427
  return (struct pos_count_max){ .max_pos = max_pos_,
      .count = new_num_intersections };
428 429
}

430
static Xt_int *
431
exchange_points_to_remove(int num_src_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
432 433
                          const struct Xt_com_list
                          src_com[num_src_intersections],
434
                          int num_dst_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
435 436
                          const struct Xt_com_list
                          dst_com[num_dst_intersections],
437
                          int *restrict num_src_indices_to_remove_per_intersection,
Thomas Jahns's avatar
Thomas Jahns committed
438
                          Xt_int *dst_indices_to_remove,
Thomas Jahns's avatar
Thomas Jahns committed
439 440
                          const int *restrict
                          num_dst_indices_to_remove_per_intersection,
441
                          int tag_offset,
442 443
                          MPI_Comm comm) {

Thomas Jahns's avatar
Thomas Jahns committed
444 445 446
  MPI_Request * requests
    = xmalloc((size_t)(num_src_intersections + 2 * num_dst_intersections) *
              sizeof(*requests));
Thomas Jahns's avatar
Thomas Jahns committed
447 448 449
  MPI_Request *send_header_requests = requests,
    *recv_requests = requests + num_dst_intersections,
    *send_data_requests = recv_requests + num_src_intersections;
450 451

  // set up receives for indices that need to be removed from the send messages
452
  for (int i = 0; i < num_src_intersections; ++i)
453 454 455 456 457
    xt_mpi_call(MPI_Irecv(
                  num_src_indices_to_remove_per_intersection + i, 1, MPI_INT,
                  src_com[i].rank,
                  tag_offset + xt_mpi_tag_xmap_intersection_header_exchange,
                  comm, recv_requests+i), comm);
458 459 460

  // send indices that need to be removed on the target side due to duplicated
  // receives
461
  int offset = 0;
462
  unsigned num_nonempty_dst_intersections = 0;
463
  for (int i = 0; i < num_dst_intersections; ++i) {
464
    xt_mpi_call(MPI_Isend(
Thomas Jahns's avatar
Thomas Jahns committed
465 466
                  CAST_MPI_SEND_BUF(
                    num_dst_indices_to_remove_per_intersection + i), 1, MPI_INT,
467 468 469
                  dst_com[i].rank,
                  tag_offset + xt_mpi_tag_xmap_intersection_header_exchange,
                  comm, send_header_requests + i), comm);
470 471 472

    if (num_dst_indices_to_remove_per_intersection[i] > 0) {

473 474 475 476 477 478
      xt_mpi_call(MPI_Isend(
                    dst_indices_to_remove + offset,
                    num_dst_indices_to_remove_per_intersection[i], Xt_int_dt,
                    dst_com[i].rank,
                    tag_offset + xt_mpi_tag_xmap_intersection_data_exchange,
                    comm, send_data_requests + num_nonempty_dst_intersections),
479
                  comm);
Thomas Jahns's avatar
Thomas Jahns committed
480
      offset += num_dst_indices_to_remove_per_intersection[i];
481
      ++num_nonempty_dst_intersections;
482 483 484 485
    }
  }

  // wait for the receiving of headers to complete
486
  xt_mpi_call(MPI_Waitall(num_src_intersections + num_dst_intersections,
487
                          send_header_requests, MPI_STATUSES_IGNORE), comm);
488

Thomas Jahns's avatar
Thomas Jahns committed
489
  size_t total_num_src_indices_to_recv = 0;
490

491
  for (int i = 0; i < num_src_intersections; ++i)
Thomas Jahns's avatar
Thomas Jahns committed
492 493
    total_num_src_indices_to_recv
      += (size_t)num_src_indices_to_remove_per_intersection[i];
494

495
  unsigned num_nonempty_src_intersections = 0;
496
  Xt_int *src_indices_to_remove;
497 498
  if (total_num_src_indices_to_recv > 0) {

499 500
    src_indices_to_remove = xmalloc(total_num_src_indices_to_recv
                                    * sizeof(*src_indices_to_remove));
501 502 503

    // set up receive for indices that need to be removed
    offset = 0;
504
    for (int i = 0; i < num_src_intersections; ++i)
505
      if (num_src_indices_to_remove_per_intersection[i] > 0) {
506
        ++num_nonempty_src_intersections;
507
        xt_mpi_call(MPI_Irecv(
508
                      src_indices_to_remove + offset,
509 510 511 512
                      num_src_indices_to_remove_per_intersection[i],
                      Xt_int_dt, src_com[i].rank,
                      tag_offset + xt_mpi_tag_xmap_intersection_data_exchange,
                      comm,
513 514
                      send_data_requests - num_nonempty_src_intersections),
                    comm);
515

516
        offset += num_src_indices_to_remove_per_intersection[i];
517
      }
518 519

  } else {
520
    src_indices_to_remove = NULL;
521 522 523
  }

  // wait until all communication is completed
524 525
  xt_mpi_call(MPI_Waitall((int)num_nonempty_src_intersections
                          + (int)num_nonempty_dst_intersections,
526 527
                          send_data_requests-num_nonempty_src_intersections,
                          MPI_STATUSES_IGNORE), comm);
528
  free(requests);
529
  return src_indices_to_remove;
530 531
}

532
static int
533
generate_transfer_pos(struct Xt_xmap_intersection_ *xmap,
534
                      int num_src_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
535
                      const struct Xt_com_list src_com[num_src_intersections],
536
                      int num_dst_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
537
                      const struct Xt_com_list dst_com[num_dst_intersections],
538
                      Xt_idxlist src_idxlist_local,
539 540 541
                      Xt_idxlist dst_idxlist_local,
                      MPI_Comm comm) {

542 543 544 545 546 547 548 549 550 551 552 553 554
  int *num_src_indices_to_remove_per_intersection =
    xmalloc(((size_t)num_src_intersections + (size_t)num_dst_intersections)
            * sizeof(int)),
    *num_dst_indices_to_remove_per_intersection =
    num_src_indices_to_remove_per_intersection + num_src_intersections;

  struct tpd_result tpdr
    = generate_dir_transfer_pos_dst(
      num_dst_intersections, dst_com, dst_idxlist_local, xmap->msg,
      num_dst_indices_to_remove_per_intersection);
  int all_dst_covered = tpdr.all_dst_covered;
  xmap->n_in = tpdr.resCount;
  Xt_int *dst_indices_to_remove = tpdr.indices_to_remove;
555
  // exchange the points that need to be removed
556 557 558 559 560 561
  Xt_int *src_indices_to_remove
    = exchange_points_to_remove(
      num_src_intersections, src_com, num_dst_intersections, dst_com,
      num_src_indices_to_remove_per_intersection,
      dst_indices_to_remove, num_dst_indices_to_remove_per_intersection,
      xmap->tag_offset, comm);
562 563

  free(dst_indices_to_remove);
564 565 566
  num_src_indices_to_remove_per_intersection
    = xrealloc(num_src_indices_to_remove_per_intersection,
               (size_t)num_src_intersections * sizeof(int));
567

568
  struct pos_count_max tpsr
569 570 571 572
    = generate_dir_transfer_pos_src(
      num_src_intersections, src_com, src_idxlist_local, xmap->msg + xmap->n_in,
      src_indices_to_remove, num_src_indices_to_remove_per_intersection);
  xmap->max_src_pos = tpsr.max_pos;
573
  xmap->n_out = tpsr.count;
574 575 576

  free(src_indices_to_remove);
  free(num_src_indices_to_remove_per_intersection);
577
  return all_dst_covered;
Moritz Hanke's avatar
Moritz Hanke committed
578 579
}

580
Xt_xmap
581
xt_xmap_intersection_new(int num_src_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
582 583
                         const struct Xt_com_list
                         src_com[num_src_intersections],
584
                         int num_dst_intersections,
Thomas Jahns's avatar
Thomas Jahns committed
585 586
                         const struct Xt_com_list
                         dst_com[num_dst_intersections],
587 588
                         Xt_idxlist src_idxlist, Xt_idxlist dst_idxlist,
                         MPI_Comm comm) {
589

590 591 592
  // ensure that yaxt is initialized
  assert(xt_initialized());

Thomas Jahns's avatar
Thomas Jahns committed
593 594 595 596
  size_t num_isect
    = (size_t)num_dst_intersections + (size_t)num_src_intersections;
  Xt_xmap_intersection xmap
    = xmalloc(sizeof (*xmap) + num_isect * sizeof (struct exchange_data));
597 598 599

  xmap->vtable = &xmap_intersection_vtable;

600
  xmap->comm = comm = xt_mpi_comm_smart_dup(comm, &xmap->tag_offset);
601 602

  // generate exchange lists
603 604 605
  if (!generate_transfer_pos(xmap,
                             num_src_intersections, src_com,
                             num_dst_intersections, dst_com,
606 607 608
                             src_idxlist, dst_idxlist, comm)) {

    int num_dst_indices = xt_idxlist_get_num_indices(dst_idxlist);
609 610 611 612 613
    const Xt_int *dst_indices = xt_idxlist_get_indices_const(dst_idxlist);
    int * found_index_mask = xcalloc((size_t)num_dst_indices,
                                     sizeof(*found_index_mask));
    int * index_positions = xmalloc((size_t)num_dst_indices
                                    * sizeof(*index_positions));
614

615
    for (size_t i = 0; i < (size_t)num_dst_intersections; ++i) {
616 617
      xt_idxlist_get_positions_of_indices(dst_com[i].list, dst_indices,
                                          num_dst_indices, index_positions, 0);
618
      for (size_t j = 0; j < (size_t)num_dst_indices; ++j)
619 620 621 622 623 624
        found_index_mask[j] |= index_positions[j] != -1;
    }

    int first_missing_pos = 0;
    while ((first_missing_pos < (num_dst_indices - 1)) &&
           (found_index_mask[first_missing_pos])) ++first_missing_pos;
625 626 627
    free(found_index_mask);
    free(index_positions);
    xmap_intersection_delete((Xt_xmap)xmap);
628
    print_miss_msg(dst_idxlist, first_missing_pos, comm, __FILE__, __LINE__);
629
  }
630

Thomas Jahns's avatar
Thomas Jahns committed
631 632 633 634 635 636
  int n_in = xmap->n_in, n_out = xmap->n_out;
  size_t new_num_isect = (size_t)n_in + (size_t)n_out;
  if (new_num_isect != num_isect)
    xmap = xrealloc(xmap, sizeof (*xmap) + (new_num_isect
                                            * sizeof(struct exchange_data)));

637
  xmap->max_dst_pos = xt_idxlist_get_num_indices(dst_idxlist) - 1;
638 639 640 641 642

  return (Xt_xmap)xmap;
}

static int xmap_intersection_get_max_src_pos(Xt_xmap xmap) {
Thomas Jahns's avatar
Thomas Jahns committed
643
  return xmi(xmap)->max_src_pos;
644 645 646
}

static int xmap_intersection_get_max_dst_pos(Xt_xmap xmap) {
Thomas Jahns's avatar
Thomas Jahns committed
647
  return xmi(xmap)->max_dst_pos;
648 649 650
}


651 652 653
typedef int (*Xt_pos_copy)(size_t num_pos,
                           int *pos, const int *orig_pos,
                           void *state);
654

655 656 657
static int
pos_copy_verbatim(size_t num_pos,
                  int *pos, const int *orig_pos, void *state)
658 659 660
{
  (void)state;
  memcpy(pos, orig_pos, sizeof (*pos) * num_pos);
661
  return -1;
662 663
}

664 665 666 667
typedef void (*Xt_pos_ncopy)(size_t num_pos, int *pos, const int *orig_pos,
                             void *state, int num_repetitions,
                             const int displacements[num_repetitions]);

Thomas Jahns's avatar
Thomas Jahns committed
668
static void
Thomas Jahns's avatar
Thomas Jahns committed
669 670
xmap_intersection_msg_copy(size_t nmsg,
                           const struct exchange_data *restrict msg,
Thomas Jahns's avatar
Thomas Jahns committed
671 672
                           int *nmsg_copy,
                           struct exchange_data *restrict msg_copy,
673
                           int *max_pos_, int num_repetitions,
674
                           Xt_pos_copy pos_copy, void *pos_copy_state) {
Thomas Jahns's avatar
Thomas Jahns committed
675
  *nmsg_copy = (int)nmsg;
676
  int max_pos = 0;
Thomas Jahns's avatar
Thomas Jahns committed
677 678
  for (size_t i = 0; i < nmsg; ++i) {
    size_t num_transfer_pos
Thomas Jahns's avatar
Thomas Jahns committed
679
      = (size_t)(msg_copy[i].num_transfer_pos
680
                 = num_repetitions * msg[i].num_transfer_pos);
Thomas Jahns's avatar
Thomas Jahns committed
681 682
    msg_copy[i].rank = msg[i].rank;
    msg_copy[i].transfer_pos_ext_cache = NULL;
Thomas Jahns's avatar
Thomas Jahns committed
683 684
    size_t size_transfer_pos
      = num_transfer_pos * sizeof (*(msg[i].transfer_pos));
Thomas Jahns's avatar
Thomas Jahns committed
685
    msg_copy[i].transfer_pos = xmalloc(size_transfer_pos);
686 687
    int new_max_pos
      = pos_copy(num_transfer_pos,
Thomas Jahns's avatar
Thomas Jahns committed
688
                 msg_copy[i].transfer_pos, msg[i].transfer_pos,
689 690 691
                 pos_copy_state);
    if (new_max_pos > max_pos)
      max_pos = new_max_pos;
692
    if (pos_copy == pos_copy_verbatim)
Thomas Jahns's avatar
Thomas Jahns committed
693
      msg_copy[i].num_transfer_pos_ext = msg[i].num_transfer_pos_ext;
694
    else
Thomas Jahns's avatar
Thomas Jahns committed
695 696
      msg_copy[i].num_transfer_pos_ext =
        (int)(count_pos_ext(num_transfer_pos, msg_copy[i].transfer_pos));
Thomas Jahns's avatar
Thomas Jahns committed
697
  }
698 699
  if (pos_copy != pos_copy_verbatim)
    *max_pos_ = max_pos;
Thomas Jahns's avatar
Thomas Jahns committed
700 701 702
}

static Xt_xmap
703
xmap_intersection_copy_(Xt_xmap xmap,
704
                        int num_repetitions,
705 706
                        Xt_pos_copy pos_copy_in, void *pci_state,
                        Xt_pos_copy pos_copy_out, void *pco_state)
Thomas Jahns's avatar
Thomas Jahns committed
707
{
Thomas Jahns's avatar
Thomas Jahns committed
708 709 710 711 712 713 714
  Xt_xmap_intersection xmap_intersection = xmi(xmap), xmap_intersection_new;
  size_t n_in = (size_t)xmap_intersection->n_in,
    n_out = (size_t)xmap_intersection->n_out,
    num_isect = n_in + n_out;
  xmap_intersection_new
    = xmalloc(sizeof (*xmap_intersection_new)
              + num_isect * sizeof (struct exchange_data));
Thomas Jahns's avatar
Thomas Jahns committed
715
  xmap_intersection_new->vtable = xmap_intersection->vtable;
Thomas Jahns's avatar
Thomas Jahns committed
716 717
  xmap_intersection_new->n_in = (int)n_in;
  xmap_intersection_new->n_out = (int)n_out;
Thomas Jahns's avatar
Thomas Jahns committed
718 719
  xmap_intersection_new->max_src_pos = xmap_intersection->max_src_pos;
  xmap_intersection_new->max_dst_pos = xmap_intersection->max_dst_pos;
Thomas Jahns's avatar
Thomas Jahns committed
720
  xmap_intersection_msg_copy(n_in, xmap_intersection->msg,
Thomas Jahns's avatar
Thomas Jahns committed
721
                             &xmap_intersection_new->n_in,
Thomas Jahns's avatar
Thomas Jahns committed
722
                             xmap_intersection_new->msg,
723
                             &xmap_intersection_new->max_dst_pos,
724
                             num_repetitions,
725
                             pos_copy_in, pci_state);
Thomas Jahns's avatar
Thomas Jahns committed
726
  xmap_intersection_msg_copy(n_out, xmap_intersection->msg + n_in,
Thomas Jahns's avatar
Thomas Jahns committed
727
                             &xmap_intersection_new->n_out,
Thomas Jahns's avatar
Thomas Jahns committed
728
                             xmap_intersection_new->msg+n_in,
729
                             &xmap_intersection_new->max_src_pos,
730
                             num_repetitions,
731
                             pos_copy_out, pco_state);
Thomas Jahns's avatar
Thomas Jahns committed
732 733 734 735 736 737
  xmap_intersection_new->comm
    = xt_mpi_comm_smart_dup(xmap_intersection->comm,
                            &xmap_intersection_new->tag_offset);
  return (Xt_xmap)xmap_intersection_new;
}

738 739 740
static Xt_xmap
xmap_intersection_copy(Xt_xmap xmap)
{
741
  return xmap_intersection_copy_(xmap, 1,
742 743 744 745
                                 pos_copy_verbatim, NULL,
                                 pos_copy_verbatim, NULL);
}

746 747
static void
xmap_intersection_msg_delete(int nmsg, struct exchange_data *msg) {
Thomas Jahns's avatar
Thomas Jahns committed
748
  for (int i = 0; i < nmsg; ++i) {
749 750 751 752 753
    free(msg[i].transfer_pos_ext_cache);
    free(msg[i].transfer_pos);
  }
}

754 755
static void xmap_intersection_delete(Xt_xmap xmap) {

Thomas Jahns's avatar
Thomas Jahns committed
756
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
757

Thomas Jahns's avatar
Thomas Jahns committed
758 759
  int num_isect = xmap_intersection->n_in + xmap_intersection->n_out;
  xmap_intersection_msg_delete(num_isect, xmap_intersection->msg);
760 761
  xt_mpi_comm_smart_dedup(&xmap_intersection->comm,
                          xmap_intersection->tag_offset);
762 763 764
  free(xmap_intersection);
}

765
static Xt_xmap_iter xmap_intersection_get_in_iterator(Xt_xmap xmap) {
766

Thomas Jahns's avatar
Thomas Jahns committed
767
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
768

769
  if (xmap_intersection->n_in == 0)
770 771
    return NULL;

772
  Xt_xmap_iter_intersection iter = xmalloc(sizeof (*iter));
773 774

  iter->vtable = &xmap_iterator_intersection_vtable;
Thomas Jahns's avatar
Thomas Jahns committed
775
  iter->msg = xmap_intersection->msg;
776
  iter->msgs_left = xmap_intersection->n_in - 1;
777 778 779 780

  return (Xt_xmap_iter)iter;
}

781
static Xt_xmap_iter xmap_intersection_get_out_iterator(Xt_xmap xmap) {
782

Thomas Jahns's avatar
Thomas Jahns committed
783
  Xt_xmap_intersection xmap_intersection = xmi(xmap);
784

785
  if (xmap_intersection->n_out == 0)
786 787
    return NULL;

788
  Xt_xmap_iter_intersection iter = xmalloc(sizeof (*iter));
789 790

  iter->vtable = &xmap_iterator_intersection_vtable;
Thomas Jahns's avatar
Thomas Jahns committed
791
  iter->msg = xmap_intersection->msg + xmap_intersection->n_in;
792
  iter->msgs_left = xmap_intersection->n_out - 1;
793 794 795 796

  return (Xt_xmap_iter)iter;
}

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
struct a2abuf
{
  int *buffer, *counts, *displs;
};

static inline struct a2abuf
setup_buffer(int comm_size, int n, const struct exchange_data *msgs)
{
  size_t buffer_size = 0;
  for (int i = 0; i < n; ++i)
    buffer_size += (size_t)msgs[i].num_transfer_pos;

  int *restrict counts = xmalloc((2 * (size_t)comm_size + buffer_size)
                                 * sizeof(*counts)),
    *restrict displs = counts + comm_size,
    *restrict buffer = counts + 2 * comm_size;
  for (int i = 0; i < comm_size; ++i)
    counts[i] = 0;

  for (int i = 0; i < n; ++i)
    counts[msgs[i].rank] = msgs[i].num_transfer_pos;

  for (int i = 0, accu = 0; i < comm_size; ++i) {
    displs[i] = accu;
    accu += counts[i];
  }
  return (struct a2abuf){ .buffer=buffer, .counts=counts, .displs=displs };
}

static void reorder_transfer_pos(
  int n_out, int n_in, struct exchange_data * out_msg,
  struct exchange_data * in_msg, MPI_Comm comm) {

  int comm_size;
  xt_mpi_call(MPI_Comm_size(comm, &comm_size), comm);

  struct a2abuf
    out = setup_buffer(comm_size, n_out, out_msg),
    in = setup_buffer(comm_size, n_in, in_msg);

  for (int i = 0; i < n_out; ++i) {
    const struct exchange_data * curr_msg = out_msg + i;
Thomas Jahns's avatar
Thomas Jahns committed
839
    int count = curr_msg->num_transfer_pos;
840 841 842
    if (count > 0) {
      // pack local out transfer_pos into out buffer
      memcpy(out.buffer + out.displs[curr_msg->rank],
Thomas Jahns's avatar
Thomas Jahns committed
843
             curr_msg->transfer_pos, (size_t)count * sizeof(*out.buffer));
844
      // sort local out transfer_pos
Thomas Jahns's avatar
Thomas Jahns committed
845
      xt_quicksort_int(curr_msg->transfer_pos, (size_t)count);
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866