From 7611d377401467537fa8273bcd0b024df41cca14 Mon Sep 17 00:00:00 2001 From: Thomas Jahns <jahns@dkrz.de> Date: Mon, 22 Aug 2022 14:22:07 +0200 Subject: [PATCH] Add alternative code path for huge buffers. * In practice indexing into RMA buffers may require 64bit addressing, even though individual data sets are still limited to INT_MAX entries. --- config/config.guess | 34 +++++++--- config/ltmain.sh | 25 ++++--- configure | 83 +++++++++++++++++++++++- configure.ac | 16 +++-- src/config.h.in | 9 +++ src/pio_server.c | 154 +++++++++++++++++++++++++++++++------------- 6 files changed, 246 insertions(+), 75 deletions(-) diff --git a/config/config.guess b/config/config.guess index 7f76b6228..1817bdce9 100755 --- a/config/config.guess +++ b/config/config.guess @@ -4,7 +4,7 @@ # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2022-01-09' +timestamp='2022-05-25' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -1151,16 +1151,27 @@ EOF ;; x86_64:Linux:*:*) set_cc_for_build + CPU=$UNAME_MACHINE LIBCABI=$LIBC if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_X32 >/dev/null - then - LIBCABI=${LIBC}x32 - fi + ABI=64 + sed 's/^ //' << EOF > "$dummy.c" + #ifdef __i386__ + ABI=x86 + #else + #ifdef __ILP32__ + ABI=x32 + #endif + #endif +EOF + cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'` + eval "$cc_set_abi" + case $ABI in + x86) CPU=i686 ;; + x32) LIBCABI=${LIBC}x32 ;; + esac fi - GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI + GUESS=$CPU-pc-linux-$LIBCABI ;; xtensa*:Linux:*:*) GUESS=$UNAME_MACHINE-unknown-linux-$LIBC @@ -1367,8 +1378,11 @@ EOF BePC:Haiku:*:*) # Haiku running on Intel PC compatible. GUESS=i586-pc-haiku ;; - x86_64:Haiku:*:*) - GUESS=x86_64-unknown-haiku + ppc:Haiku:*:*) # Haiku running on Apple PowerPC + GUESS=powerpc-apple-haiku + ;; + *:Haiku:*:*) # Haiku modern gcc (not bound by BeOS compat) + GUESS=$UNAME_MACHINE-unknown-haiku ;; SX-4:SUPER-UX:*:*) GUESS=sx4-nec-superux$UNAME_RELEASE diff --git a/config/ltmain.sh b/config/ltmain.sh index 9cf2bd7bc..5f5e7a13c 100644 --- a/config/ltmain.sh +++ b/config/ltmain.sh @@ -7646,10 +7646,7 @@ func_mode_link () case $pass in dlopen) libs=$dlfiles ;; dlpreopen) libs=$dlprefiles ;; - link) - libs="$deplibs %DEPLIBS%" - test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" - ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; esac fi if test lib,dlpreopen = "$linkmode,$pass"; then @@ -7983,19 +7980,19 @@ func_mode_link () # It is a libtool convenience library, so add in its objects. func_append convenience " $ladir/$objdir/$old_library" func_append old_convenience " $ladir/$objdir/$old_library" - tmp_libs= - for deplib in $dependency_libs; do - deplibs="$deplib $deplibs" - if $opt_preserve_dup_deps; then - case "$tmp_libs " in - *" $deplib "*) func_append specialdeplibs " $deplib" ;; - esac - fi - func_append tmp_libs " $deplib" - done elif test prog != "$linkmode" && test lib != "$linkmode"; then func_fatal_error "'$lib' is not a convenience library" fi + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done continue fi # $pass = conv diff --git a/configure b/configure index 3750943b1..0edd1c6c3 100755 --- a/configure +++ b/configure @@ -31187,6 +31187,8 @@ as_fn_error $? "required header yaxt.h not found or not compilable See \`config.log' for more details" "$LINENO" 5; } fi + as_fn_append CFLAGS " $YAXT_C_INCLUDE" + LIBS="$YAXT_C_LIB $LIBS" defined_Xt_uid=no ac_fn_c_check_decl "$LINENO" "MPI_UINT64_T" "ac_cv_have_decl_MPI_UINT64_T" "$ac_includes_default #include <mpi.h> @@ -31196,9 +31198,7 @@ if test "x$ac_cv_have_decl_MPI_UINT64_T" = xyes; then : defined_Xt_uid=yes else - as_fn_append CFLAGS " $YAXT_C_INCLUDE" - LIBS="$YAXT_C_LIB $LIBS" - # The cast to long int works around a bug in the HP C Compiler + # The cast to long int works around a bug in the HP C Compiler # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. # This bug is HP SR number 8606223364. @@ -31334,6 +31334,83 @@ as_fn_error $? "no way to communicate Xt_uid found See \`config.log' for more details" "$LINENO" 5; } fi { defined_Xt_uid=; unset defined_Xt_uid;} + ac_fn_c_check_func "$LINENO" "xt_redist_p2p_aext_new" "ac_cv_func_xt_redist_p2p_aext_new" +if test "x$ac_cv_func_xt_redist_p2p_aext_new" = xyes; then : + # The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of MPI_Aint" >&5 +$as_echo_n "checking size of MPI_Aint... " >&6; } +if ${ac_cv_sizeof_MPI_Aint+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (MPI_Aint))" "ac_cv_sizeof_MPI_Aint" "$ac_includes_default +#include <mpi.h> +"; then : + +else + if test "$ac_cv_type_MPI_Aint" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (MPI_Aint) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_MPI_Aint=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_MPI_Aint" >&5 +$as_echo "$ac_cv_sizeof_MPI_Aint" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_MPI_AINT $ac_cv_sizeof_MPI_Aint +_ACEOF + + + # The cast to long int works around a bug in the HP C Compiler +# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects +# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'. +# This bug is HP SR number 8606223364. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5 +$as_echo_n "checking size of int... " >&6; } +if ${ac_cv_sizeof_int+:} false; then : + $as_echo_n "(cached) " >&6 +else + if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$ac_includes_default"; then : + +else + if test "$ac_cv_type_int" = yes; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot compute sizeof (int) +See \`config.log' for more details" "$LINENO" 5; } + else + ac_cv_sizeof_int=0 + fi +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5 +$as_echo "$ac_cv_sizeof_int" >&6; } + + + +cat >>confdefs.h <<_ACEOF +#define SIZEOF_INT $ac_cv_sizeof_int +_ACEOF + + + if test $ac_cv_sizeof_MPI_Aint -gt $ac_cv_sizeof_int; then : + +$as_echo "#define USE_XT_REDIST_P2P_AEXT_NEW 1" >>confdefs.h + +fi +fi + CFLAGS=$saved_CFLAGS LIBS=$saved_LIBS diff --git a/configure.ac b/configure.ac index 11a799591..f8d4aa12c 100644 --- a/configure.ac +++ b/configure.ac @@ -397,16 +397,17 @@ dnl whatever pkg-config finds will be used instead. [AC_MSG_FAILURE([cannot link C YAXT programs])],,[$YAXT_C_LIB])], [AC_MSG_FAILURE([required header yaxt.h not found or not compilable])],, [$YAXT_C_INCLUDE],[[]]) -dnl +dnl the following tests can rely on having yaxt.h and C libs available + AS_VAR_APPEND([CFLAGS], [" $YAXT_C_INCLUDE"]) + LIBS="$YAXT_C_LIB $LIBS" +dnl determine how to xfer Xt_uid values by MPI AH_TEMPLATE([YAXT_UID_DT], [Defined to MPI datatype to be used for Xt_uid])dnl defined_Xt_uid=no AC_CHECK_DECL([MPI_UINT64_T], [AC_DEFINE([YAXT_UID_DT], [MPI_UINT64_T]) defined_Xt_uid=yes], - [AS_VAR_APPEND([CFLAGS], [" $YAXT_C_INCLUDE"]) - LIBS="$YAXT_C_LIB $LIBS" - AC_CHECK_SIZEOF([Xt_uid],, + [AC_CHECK_SIZEOF([Xt_uid],, [AC_INCLUDES_DEFAULT @%:@include <yaxt.h>]) AC_CHECK_SIZEOF([unsigned long]) @@ -425,6 +426,13 @@ dnl AS_VAR_IF([defined_Xt_uid], [no], [AC_MSG_FAILURE([no way to communicate Xt_uid found])]) AS_UNSET([defined_Xt_uid]) +dnl determine if xt_redist_p2p_aext_new is available + AC_CHECK_FUNC([xt_redist_p2p_aext_new], + [AC_CHECK_SIZEOF([MPI_Aint],,[AC_INCLUDES_DEFAULT +@%:@include <mpi.h>]) + AC_CHECK_SIZEOF([int]) + AS_IF([test $ac_cv_sizeof_MPI_Aint -gt $ac_cv_sizeof_int], + [AC_DEFINE([USE_XT_REDIST_P2P_AEXT_NEW],[1],[Defined if aext alternatives should be tested])])]) CFLAGS=$saved_CFLAGS LIBS=$saved_LIBS diff --git a/src/config.h.in b/src/config.h.in index 4f24a77de..7807f9508 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -253,6 +253,12 @@ your system. */ #undef PTHREAD_CREATE_JOINABLE +/* The size of `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of `MPI_Aint', as computed by sizeof. */ +#undef SIZEOF_MPI_AINT + /* The size of `unsigned long', as computed by sizeof. */ #undef SIZEOF_UNSIGNED_LONG @@ -274,6 +280,9 @@ /* User name */ #undef USER_NAME +/* Defined if aext alternatives should be tested */ +#undef USE_XT_REDIST_P2P_AEXT_NEW + /* Version number of package */ #undef VERSION diff --git a/src/pio_server.c b/src/pio_server.c index 4a3a67a9f..4106fb706 100644 --- a/src/pio_server.c +++ b/src/pio_server.c @@ -284,7 +284,7 @@ resizeVarGatherBuf(size_t size, void **buf, size_t *bufSize) #define wHECast(buf) ((struct winHeaderEntry *)(void *)buf) static Xt_xmap -buildVarXmap(struct Xt_offset_ext *restrict partExts, +buildVarXmap(int *restrict partSizes, const struct clientBuf *restrict clientBuf, size_t headerIdx, Xt_idxlist dstList, Xt_idxlist *partDescPreset, @@ -297,8 +297,6 @@ buildVarXmap(struct Xt_offset_ext *restrict partExts, Xt_idxlist *part = partDescPreset ? partDescPreset : Malloc(numClients * sizeof (part[0])); int conversion = (wHECast(clientBuf[0].mem))[headerIdx].id; - size_t elemSize - = conversion == DATA_HEADER_FLOAT ? sizeof (float) : sizeof (double); for (size_t clientIdx = 0; clientIdx < numClients; ++clientIdx) { unsigned char *clientMem = clientBuf[clientIdx].mem; @@ -319,17 +317,7 @@ buildVarXmap(struct Xt_offset_ext *restrict partExts, = xt_idxlist_unpack(clientMem, (int)clientBuf[clientIdx].size, &position, pioInterComm); } - unsigned partSize - = (unsigned)xt_idxlist_get_num_indices(part[clientIdx]); - size_t charOfs = (size_t)((clientMem - + (wHECast(clientMem))[headerIdx].offset) - - clientBuf[0].mem); - xassert(charOfs % elemSize == 0 - && charOfs / elemSize + partSize <= INT_MAX); - int elemOfs = (int)(charOfs / elemSize); - partExts[clientIdx].start = elemOfs; - partExts[clientIdx].size = (int)partSize; - partExts[clientIdx].stride = 1; + partSizes[clientIdx+1] = xt_idxlist_get_num_indices(part[clientIdx]); } Xt_idxlist srcList = xt_idxlist_collection_new(part, (int)numClients); if (!partDescPreset) @@ -818,6 +806,82 @@ buildDecoPresetXmaps(int streamID, struct partDescPreset clientDeco, } } +struct partExtRes +{ + void *partExts; + bool needAExt; +}; + +static struct partExtRes +fillPartExts(const int *partSizes, + const struct clientBuf *restrict clientBuf, + size_t headerIdx) +{ + int conversion = (wHECast(clientBuf[0].mem))[headerIdx].id; + int varID = wHECast(clientBuf[0].mem)[headerIdx].specific.dataRecord.varID; + size_t elemSize + = conversion == DATA_HEADER_FLOAT ? sizeof (float) : sizeof (double); +#ifdef USE_XT_REDIST_P2P_AEXT_NEW + struct Xt_aoffset_ext *partAExts = NULL; + bool needAExt = false; +#endif + size_t numClients = (size_t)numClients_; + struct partExtRes extRes; + struct Xt_offset_ext *partExts = extRes.partExts + = Malloc(numClients * sizeof (partExts[0])); +#ifdef USE_XT_REDIST_P2P_AEXT_NEW + for (size_t clientIdx = 0; clientIdx < numClients; ++clientIdx) + { + unsigned char *clientMem = clientBuf[clientIdx].mem; + xassert((wHECast(clientMem))[headerIdx].specific.dataRecord.varID == varID + && (wHECast(clientMem))[headerIdx].id == conversion); + size_t charOfs = (size_t)((clientMem + + (wHECast(clientMem))[headerIdx].offset) + - clientBuf[0].mem); + int partSize = partSizes[clientIdx+1]; + // FIXME: prevent overflow in second expression + needAExt |= (charOfs % elemSize != 0) + | (charOfs + (size_t)partSize * elemSize + > (size_t)INT_MAX * elemSize); + } + extRes.needAExt = needAExt; + if (needAExt) + { + extRes.partExts = partAExts + = Realloc(partExts, numClients * sizeof (partAExts[0])); + partExts = NULL; + } +#endif + for (size_t clientIdx = 0; clientIdx < numClients; ++clientIdx) + { + unsigned char *clientMem = clientBuf[clientIdx].mem; + size_t charOfs = (size_t)((clientMem + + (wHECast(clientMem))[headerIdx].offset) + - clientBuf[0].mem); + int partSize = partSizes[clientIdx+1]; +#ifndef USE_XT_REDIST_P2P_AEXT_NEW + xassert(charOfs % elemSize == 0 + && charOfs / elemSize + (size_t)partSize <= INT_MAX); +#else + if (needAExt) + { + partAExts[clientIdx].start = (MPI_Aint)charOfs; + partAExts[clientIdx].size = partSize; + partAExts[clientIdx].stride = (MPI_Aint)elemSize; + } + else +#endif + { + int elemOfs = (int)(charOfs / elemSize); + partExts[clientIdx].start = elemOfs; + partExts[clientIdx].size = partSize; + partExts[clientIdx].stride = 1; + } + } + return extRes; +} + + static Xt_redist buildVarRedist(int headerIdx, size_t streamIdx, /* index list representing the data elements gathered on @@ -833,7 +897,6 @@ buildVarRedist(int headerIdx, size_t streamIdx, size_t elemSize = conversion == DATA_HEADER_FLOAT ? sizeof (float) : sizeof (double); size_t numClients = (size_t)numClients_; - struct Xt_offset_ext *partExts = Malloc(numClients * sizeof (partExts[0])); MPI_Comm pioInterComm = cdiPioInqInterComm(), collComm = commInqCommColl(); Xt_xmap gatherXmap; @@ -842,11 +905,13 @@ buildVarRedist(int headerIdx, size_t streamIdx, .size = xt_idxlist_get_num_indices(dstList), .stride = 1 }; Xt_uid *restrict uids = NULL; + void *tmpBuf = NULL; int *restrict partSizes = NULL; bool cacheXmaps = conf->cacheXmaps; if (cacheXmaps) { allocUIDLookup(numClients, &uids, &partSizes); + tmpBuf = uids; uids[0] = xt_idxlist_get_uid(dstList); for (size_t clientIdx = 0; clientIdx < numClients; ++clientIdx) { @@ -859,47 +924,48 @@ buildVarRedist(int headerIdx, size_t streamIdx, } if ((gatherXmap = cdiPioXmapCacheLookup(XmapCache, uids, partSizes))) { - for (size_t clientIdx = 0; clientIdx < numClients; ++clientIdx) - { - unsigned char *clientMem = clientBuf[clientIdx].mem; - struct dataRecord *dataHeader - = &(wHECast(clientMem))[headerIdx].specific.dataRecord; - xassert(dataHeader->varID == varID - && (wHECast(clientMem))[headerIdx].id == conversion); - size_t charOfs = (size_t)((clientMem - + (wHECast(clientMem))[headerIdx].offset) - - clientBuf[0].mem); - int partSize = partSizes[clientIdx+1]; - xassert(charOfs % elemSize == 0 - && charOfs / elemSize + (size_t)partSize <= INT_MAX); - int elemOfs = (int)(charOfs / elemSize); - partExts[clientIdx].start = elemOfs; - partExts[clientIdx].size = partSize; - partExts[clientIdx].stride = 1; - } - goto finishXmapCaching; + goto afterNewXmapCacheEntry; } } - gatherXmap = buildVarXmap(partExts, clientBuf, (size_t)headerIdx, + else + { + tmpBuf = partSizes = Malloc(sizeof (*partSizes) * (numClients + 1)); + } + partSizes[0] = gatherExt.size; + gatherXmap = buildVarXmap(partSizes, clientBuf, (size_t)headerIdx, dstList, partDescPreset, pioInterComm, collComm, varID, conf); if (cacheXmaps) { - partSizes[0] = gatherExt.size; - for (size_t i = 0; i < numClients; ++i) - partSizes[i+1] = partExts[i].size; cdiPioXmapCacheAdd(XmapCache, uids, partSizes, gatherXmap); - finishXmapCaching: - Free(uids); } + afterNewXmapCacheEntry:; + struct partExtRes extRes + = fillPartExts(partSizes, clientBuf, (size_t)headerIdx); + Free(tmpBuf); MPI_Datatype elemDt = conversion == DATA_HEADER_FLOAT ? MPI_FLOAT : MPI_DOUBLE; - Xt_redist varRedist - = xt_redist_p2p_ext_new(gatherXmap, (int)numClients, partExts, 1, - &gatherExt, elemDt); + Xt_redist varRedist; +#if USE_XT_REDIST_P2P_AEXT_NEW + if (extRes.needAExt) + { + struct Xt_aoffset_ext gatherAExt + = { .start = 0, + .size = gatherExt.size, + .stride = (MPI_Aint)elemSize }; + + varRedist + = xt_redist_p2p_aext_new(gatherXmap, (int)numClients, extRes.partExts, + 1, &gatherAExt, elemDt); + } + else +#endif + varRedist + = xt_redist_p2p_ext_new(gatherXmap, (int)numClients, extRes.partExts, + 1, &gatherExt, elemDt); if (!cacheXmaps) xt_xmap_delete(gatherXmap); - Free(partExts); + Free(extRes.partExts); return varRedist; } -- GitLab