Commit c96bcd5f authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

remapnn: optimize memory handling

parent 0d067741
......@@ -184,6 +184,7 @@ kd_doBuildTree(void *threadarg)
else qcomp = _compPoints2;
pmergesort(points, nPoints, sizeof(struct kd_point), qcomp, max_threads);
pivot = nPoints / 2;
if ((node = kd_allocNode(points, pivot, min, max, sortaxis, dim)) == NULL)
return NULL;
......
......@@ -106,6 +106,7 @@ mergesort_t(void *args)
perror("pthread_create");
return NULL;
}
pthread_join(thr[0], NULL);
pthread_join(thr[1], NULL);
......
......@@ -242,7 +242,7 @@ void scrip_remap_bilinear_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid,
/* Successfully found iw,jw - compute weights */
set_bilinear_weights(iw, jw, wgts);
store_weightlinks(4, src_add, wgts, tgt_cell_add, weightlinks);
store_weightlinks(1, 4, src_add, wgts, tgt_cell_add, weightlinks);
}
else
{
......@@ -264,12 +264,12 @@ void scrip_remap_bilinear_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid,
tgt_grid->cell_frac[tgt_cell_add] = 1.;
store_weightlinks(4, src_add, wgts, tgt_cell_add, weightlinks);
store_weightlinks(1, 4, src_add, wgts, tgt_cell_add, weightlinks);
}
}
}
weightlinks2remaplinks(tgt_grid_size, weightlinks, rv);
weightlinks2remaplinks(1, tgt_grid_size, weightlinks, rv);
if ( weightlinks ) Free(weightlinks);
......
......@@ -1065,7 +1065,7 @@ void remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, remapva
tgt_grid->cell_frac[tgt_cell_add] += partial_weight;
}
store_weightlinks(num_weights, srch_add, partial_weights, tgt_cell_add, weightlinks);
store_weightlinks(1, num_weights, srch_add, partial_weights, tgt_cell_add, weightlinks);
tgt_grid->cell_area[tgt_cell_add] = tgt_area;
// printf("area %d %g %g\n", tgt_cell_add, tgt_grid->cell_area[tgt_cell_add], tgt_area);
......@@ -1110,7 +1110,7 @@ void remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, remapva
Free(srch_add2[i]);
}
weightlinks2remaplinks(tgt_grid_size, weightlinks, rv);
weightlinks2remaplinks(1, tgt_grid_size, weightlinks, rv);
if ( weightlinks ) Free(weightlinks);
......
......@@ -369,11 +369,21 @@ void remap_distwgt_weights(unsigned num_neighbors, remapgrid_t *src_grid, remapg
unsigned ny = src_grid->dims[1];
weightlinks_t *weightlinks = (weightlinks_t *) Malloc(tgt_grid_size*sizeof(weightlinks_t));
weightlinks[0].addweights = (addweight_t *) Malloc(num_neighbors*tgt_grid_size*sizeof(addweight_t));
for ( unsigned tgt_cell_add = 1; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
weightlinks[tgt_cell_add].addweights = weightlinks[0].addweights + num_neighbors*tgt_cell_add;
int nbr_mask[num_neighbors]; /* mask at nearest neighbors */
int nbr_add[num_neighbors]; /* source address at nearest neighbors */
double nbr_dist[num_neighbors]; /* angular distance four nearest neighbors */
#if defined(_OPENMP)
double omp_get_wtime(void);
double start = 0;
if ( cdoVerbose ) start = omp_get_wtime();
#endif
struct gridsearch *gs = NULL;
if ( remap_grid_type == REMAP_GRID_TYPE_REG2D )
gs = gridsearch_create_reg2d(nx, ny, src_grid->reg2d_center_lon, src_grid->reg2d_center_lat);
......@@ -382,6 +392,11 @@ void remap_distwgt_weights(unsigned num_neighbors, remapgrid_t *src_grid, remapg
else
gs = gridsearch_create(src_grid_size, src_grid->cell_center_lon, src_grid->cell_center_lat);
#if defined(_OPENMP)
if ( cdoVerbose ) printf("gridsearch created: %.2f seconds\n", omp_get_wtime()-start);
if ( cdoVerbose ) start = omp_get_wtime();
#endif
/* Loop over destination grid */
double findex = 0;
......@@ -424,17 +439,21 @@ void remap_distwgt_weights(unsigned num_neighbors, remapgrid_t *src_grid, remapg
for ( unsigned n = 0; n < nadds; ++n )
if ( nbr_mask[n] ) tgt_grid->cell_frac[tgt_cell_add] = ONE;
store_weightlinks(nadds, nbr_add, nbr_dist, tgt_cell_add, weightlinks);
store_weightlinks(0, nadds, nbr_add, nbr_dist, tgt_cell_add, weightlinks);
}
progressStatus(0, 1, 1);
if ( gs ) gridsearch_delete(gs);
weightlinks2remaplinks(tgt_grid_size, weightlinks, rv);
weightlinks2remaplinks(0, tgt_grid_size, weightlinks, rv);
if ( weightlinks ) Free(weightlinks);
#if defined(_OPENMP)
if ( cdoVerbose ) printf("gridsearch nearest: %.2f seconds\n", omp_get_wtime()-start);
#endif
} /* scrip_remap_weights_distwgt */
static
......@@ -465,9 +484,12 @@ void remap_distwgt(unsigned num_neighbors, remapgrid_t *src_grid, remapgrid_t *t
int nbr_add[num_neighbors]; /* source address at nearest neighbors */
double nbr_dist[num_neighbors]; /* angular distance four nearest neighbors */
clock_t start;
#if defined(_OPENMP)
double omp_get_wtime(void);
double start = 0;
start = clock();
if ( cdoVerbose ) start = omp_get_wtime();
#endif
struct gridsearch *gs = NULL;
if ( src_remap_grid_type == REMAP_GRID_TYPE_REG2D )
......@@ -477,9 +499,10 @@ void remap_distwgt(unsigned num_neighbors, remapgrid_t *src_grid, remapgrid_t *t
else
gs = gridsearch_create(src_grid_size, src_grid->cell_center_lon, src_grid->cell_center_lat);
if ( cdoVerbose ) printf("gridsearch created: %.2f seconds\n", ((double)(clock()-start))/CLOCKS_PER_SEC);
start = clock();
#if defined(_OPENMP)
if ( cdoVerbose ) printf("gridsearch created: %.2f seconds\n", omp_get_wtime()-start);
if ( cdoVerbose ) start = omp_get_wtime();
#endif
/* Loop over destination grid */
......@@ -533,6 +556,8 @@ void remap_distwgt(unsigned num_neighbors, remapgrid_t *src_grid, remapgrid_t *t
if ( gs ) gridsearch_delete(gs);
if ( cdoVerbose ) printf("gridsearch nearest: %.2f seconds\n", ((double)(clock()-start))/CLOCKS_PER_SEC);
#if defined(_OPENMP)
if ( cdoVerbose ) printf("gridsearch nearest: %.2f seconds\n", omp_get_wtime()-start);
#endif
} /* scrip_remap_distwgt */
......@@ -3,7 +3,6 @@
#include "remap.h"
#include "remap_store_link.h"
static
int cmp_adds(const void *s1, const void *s2)
{
......@@ -31,9 +30,9 @@ int cmp_adds4(const void *s1, const void *s2)
}
static
void sort_addweights(size_t num_weights, addweight_t *addweights)
void sort_addweights(unsigned num_weights, addweight_t *addweights)
{
size_t n;
unsigned n;
for ( n = 1; n < num_weights; ++n )
if ( addweights[n].add < addweights[n-1].add ) break;
......@@ -43,9 +42,9 @@ void sort_addweights(size_t num_weights, addweight_t *addweights)
}
static
void sort_addweights4(size_t num_weights, addweight4_t *addweights)
void sort_addweights4(unsigned num_weights, addweight4_t *addweights)
{
size_t n;
unsigned n;
for ( n = 1; n < num_weights; ++n )
if ( addweights[n].add < addweights[n-1].add ) break;
......@@ -55,9 +54,9 @@ void sort_addweights4(size_t num_weights, addweight4_t *addweights)
}
void sort_add_and_wgts(size_t num_weights, int *src_add, double *wgts)
void sort_add_and_wgts(unsigned num_weights, int *src_add, double *wgts)
{
size_t n;
unsigned n;
for ( n = 1; n < num_weights; ++n )
if ( src_add[n] < src_add[n-1] ) break;
......@@ -84,9 +83,9 @@ void sort_add_and_wgts(size_t num_weights, int *src_add, double *wgts)
}
void sort_add_and_wgts4(size_t num_weights, int *src_add, double wgts[4][4])
void sort_add_and_wgts4(unsigned num_weights, int *src_add, double wgts[4][4])
{
size_t n;
unsigned n;
for ( n = 1; n < num_weights; ++n )
if ( src_add[n] < src_add[n-1] ) break;
......@@ -99,7 +98,7 @@ void sort_add_and_wgts4(size_t num_weights, int *src_add, double wgts[4][4])
for ( n = 0; n < num_weights; ++n )
{
addweights[n].add = src_add[n];
for ( long k = 0; k < 4; ++k )
for ( unsigned k = 0; k < 4; ++k )
addweights[n].weight[k] = wgts[n][k];
}
......@@ -108,36 +107,42 @@ void sort_add_and_wgts4(size_t num_weights, int *src_add, double wgts[4][4])
for ( n = 0; n < num_weights; ++n )
{
src_add[n] = addweights[n].add;
for ( long k = 0; k < 4; ++k )
for ( unsigned k = 0; k < 4; ++k )
wgts[n][k] = addweights[n].weight[k];
}
}
}
void store_weightlinks(long num_weights, int *srch_add, double *weights, long cell_add, weightlinks_t *weightlinks)
void store_weightlinks(int lalloc, unsigned num_weights, int *srch_add, double *weights, unsigned cell_add, weightlinks_t *weightlinks)
{
weightlinks[cell_add].nlinks = 0;
weightlinks[cell_add].offset = 0;
if ( num_weights )
{
addweight_t *addweights = (addweight_t *) Malloc(num_weights*sizeof(addweight_t));
for ( long n = 0; n < num_weights; ++n )
addweight_t *addweights = NULL;
if ( lalloc )
addweights = (addweight_t *) Malloc(num_weights*sizeof(addweight_t));
else
addweights = weightlinks[cell_add].addweights;
for ( unsigned n = 0; n < num_weights; ++n )
{
addweights[n].add = srch_add[n];
addweights[n].weight = weights[n];
}
sort_addweights(num_weights, addweights);
if ( num_weights > 1 ) sort_addweights(num_weights, addweights);
weightlinks[cell_add].addweights = addweights;
weightlinks[cell_add].nlinks = num_weights;
weightlinks[cell_add].nlinks = num_weights;
if ( lalloc ) weightlinks[cell_add].addweights = addweights;
}
}
void store_weightlinks4(long num_weights, int *srch_add, double weights[4][4], long cell_add, weightlinks4_t *weightlinks)
void store_weightlinks4(unsigned num_weights, int *srch_add, double weights[4][4], unsigned cell_add, weightlinks4_t *weightlinks)
{
weightlinks[cell_add].nlinks = 0;
weightlinks[cell_add].offset = 0;
......@@ -145,10 +150,10 @@ void store_weightlinks4(long num_weights, int *srch_add, double weights[4][4], l
if ( num_weights )
{
addweight4_t *addweights = (addweight4_t *) Malloc(num_weights*sizeof(addweight4_t));
for ( long n = 0; n < num_weights; ++n )
for ( unsigned n = 0; n < num_weights; ++n )
{
addweights[n].add = srch_add[n];
for ( long k = 0; k < 4; ++k )
for ( unsigned k = 0; k < 4; ++k )
addweights[n].weight[k] = weights[n][k];
}
......@@ -160,11 +165,11 @@ void store_weightlinks4(long num_weights, int *srch_add, double weights[4][4], l
}
void weightlinks2remaplinks(long tgt_grid_size, weightlinks_t *weightlinks, remapvars_t *rv)
void weightlinks2remaplinks(int lalloc, unsigned tgt_grid_size, weightlinks_t *weightlinks, remapvars_t *rv)
{
long nlinks = 0;
unsigned nlinks = 0;
for ( long tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
for ( unsigned tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
if ( weightlinks[tgt_cell_add].nlinks )
{
......@@ -183,39 +188,47 @@ void weightlinks2remaplinks(long tgt_grid_size, weightlinks_t *weightlinks, rema
int *restrict src_cell_adds = rv->src_cell_add;
int *restrict tgt_cell_adds = rv->tgt_cell_add;
double *restrict wts = rv->wts;
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic) default(none) shared(src_cell_adds,tgt_cell_adds,wts,weightlinks,tgt_grid_size)
#pragma omp parallel for schedule(static) default(none) shared(src_cell_adds,tgt_cell_adds,wts,weightlinks,tgt_grid_size)
#endif
for ( long tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
for ( unsigned tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
long num_links = weightlinks[tgt_cell_add].nlinks;
unsigned num_links = weightlinks[tgt_cell_add].nlinks;
if ( num_links )
{
long offset = weightlinks[tgt_cell_add].offset;
unsigned offset = weightlinks[tgt_cell_add].offset;
addweight_t *addweights = weightlinks[tgt_cell_add].addweights;
for ( long ilink = 0; ilink < num_links; ++ilink )
for ( unsigned ilink = 0; ilink < num_links; ++ilink )
{
src_cell_adds[offset+ilink] = addweights[ilink].add;
tgt_cell_adds[offset+ilink] = tgt_cell_add;
wts[offset+ilink] = addweights[ilink].weight;
}
#ifdef _OPENMP
free(addweights);
#else
Free(addweights);
#endif
}
}
if ( lalloc )
{
for ( unsigned tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
unsigned num_links = weightlinks[tgt_cell_add].nlinks;
if ( num_links ) Free(weightlinks[tgt_cell_add].addweights);
}
}
else
{
Free(weightlinks[0].addweights);
}
}
}
void weightlinks2remaplinks4(long tgt_grid_size, weightlinks4_t *weightlinks, remapvars_t *rv)
void weightlinks2remaplinks4(unsigned tgt_grid_size, weightlinks4_t *weightlinks, remapvars_t *rv)
{
long nlinks = 0;
unsigned nlinks = 0;
for ( long tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
for ( unsigned tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
if ( weightlinks[tgt_cell_add].nlinks )
{
......@@ -238,18 +251,18 @@ void weightlinks2remaplinks4(long tgt_grid_size, weightlinks4_t *weightlinks, re
#if defined(_OPENMP)
#pragma omp parallel for default(none) shared(src_cell_adds,tgt_cell_adds,wts,weightlinks,tgt_grid_size)
#endif
for ( long tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
for ( unsigned tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
long num_links = weightlinks[tgt_cell_add].nlinks;
unsigned num_links = weightlinks[tgt_cell_add].nlinks;
if ( num_links )
{
long offset = weightlinks[tgt_cell_add].offset;
unsigned offset = weightlinks[tgt_cell_add].offset;
addweight4_t *addweights = weightlinks[tgt_cell_add].addweights;
for ( long ilink = 0; ilink < num_links; ++ilink )
for ( unsigned ilink = 0; ilink < num_links; ++ilink )
{
src_cell_adds[offset+ilink] = addweights[ilink].add;
tgt_cell_adds[offset+ilink] = tgt_cell_add;
for ( long k = 0; k < 4; ++k )
for ( unsigned k = 0; k < 4; ++k )
wts[(offset+ilink)*4+k] = addweights[ilink].weight[k];
}
#ifdef _OPENMP
......
......@@ -15,24 +15,24 @@ typedef struct
} addweight4_t;
typedef struct {
int nlinks;
int offset;
unsigned nlinks;
unsigned offset;
addweight_t *addweights;
} weightlinks_t;
typedef struct {
int nlinks;
int offset;
unsigned nlinks;
unsigned offset;
addweight4_t *addweights;
} weightlinks4_t;
void store_weightlinks(long num_weights, int *srch_add, double *weights, long cell_add, weightlinks_t *weightlinks);
void store_weightlinks4(long num_weights, int *srch_add, double weights[4][4], long cell_add, weightlinks4_t *weightlinks);
void weightlinks2remaplinks(long tgt_grid_size, weightlinks_t *weightlinks, remapvars_t *rv);
void weightlinks2remaplinks4(long tgt_grid_size, weightlinks4_t *weightlinks, remapvars_t *rv);
void sort_add_and_wgts(size_t num_weights, int *src_add, double *wgts);
void sort_add_and_wgts4(size_t num_weights, int *src_add, double wgts[4][4]);
void store_weightlinks(int lalloc, unsigned num_weights, int *srch_add, double *weights, unsigned cell_add, weightlinks_t *weightlinks);
void store_weightlinks4(unsigned num_weights, int *srch_add, double weights[4][4], unsigned cell_add, weightlinks4_t *weightlinks);
void weightlinks2remaplinks(int lalloc, unsigned tgt_grid_size, weightlinks_t *weightlinks, remapvars_t *rv);
void weightlinks2remaplinks4(unsigned tgt_grid_size, weightlinks4_t *weightlinks, remapvars_t *rv);
void sort_add_and_wgts(unsigned num_weights, int *src_add, double *wgts);
void sort_add_and_wgts4(unsigned num_weights, int *src_add, double wgts[4][4]);
#endif /* _REMAP_STORE_LINK */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment