Commit d937ef14 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

Replace omp atomic by reduction.

parent 87fe82a0
......@@ -88,8 +88,8 @@ void *ensstat_func(void *ensarg)
double missval = vlistInqVarMissval(arg->vlistID1, arg->varID[t]);
size_t nmiss = 0;
#if defined(_OPENMP)
#pragma omp parallel for default(shared)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(shared) reduction(+:nmiss)
#endif
for ( int i = 0; i < gridsize; ++i )
{
......@@ -109,13 +109,7 @@ void *ensstat_func(void *ensarg)
arg->array2[i] = arg->lpctl ? fldpctl(field[ompthID], arg->pn) : fldfun(field[ompthID], arg->operfunc);
if ( DBL_IS_EQUAL(arg->array2[i], field[ompthID].missval) )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
nmiss++;
}
if ( DBL_IS_EQUAL(arg->array2[i], field[ompthID].missval) ) nmiss++;
if ( arg->count_data ) arg->count2[i] = nfiles - field[ompthID].nmiss;
}
......
......@@ -364,15 +364,12 @@ void setmisstodis(field_type *field1, field_type *field2, int num_neighbors)
double findex = 0;
#if defined(_OPENMP)
#pragma omp parallel for default(none) shared(nbr_mask, nbr_add, nbr_dist) \
shared(findex, mindex, vindex, array1, array2, xvals, yvals, gs, nmiss, num_neighbors)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) shared(nbr_mask, nbr_add, nbr_dist) \
shared(mindex, vindex, array1, array2, xvals, yvals, gs, nmiss, num_neighbors)
#endif
for ( unsigned i = 0; i < nmiss; ++i )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/nmiss);
......
......@@ -489,9 +489,6 @@ void gridboxstat(field_type *field1, field_type *field2, size_t xinc, size_t yin
int lprogress = 1;
#if defined(_OPENMP)
if ( ompthID != 0 ) lprogress = 0;
#endif
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( lprogress ) progressStatus(0, 1, findex/nlat2*nlon2);
......
......@@ -95,7 +95,6 @@ size_t smooth_knn_normalize_weights(size_t num_neighbors, double dist_tot, struc
static
void smooth(int gridID, double missval, const double *restrict array1, double *restrict array2, size_t *nmiss, smoothpoint_t spoint)
{
*nmiss = 0;
int gridID0 = gridID;
size_t gridsize = gridInqSize(gridID);
size_t num_neighbors = spoint.maxpoints;
......@@ -150,18 +149,17 @@ void smooth(int gridID, double missval, const double *restrict array1, double *r
start = clock();
size_t nmissx = 0;
double findex = 0;
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic) default(none) shared(cdoVerbose, knn, spoint, findex, mask, array1, array2, xvals, yvals, gs, gridsize, nmiss, missval)
#ifdef HAVE_OPENMP4
#pragma omp parallel for schedule(dynamic) default(none) reduction(+:findex) reduction(+:nmissx) \
shared(cdoVerbose, knn, spoint, mask, array1, array2, xvals, yvals, gs, gridsize, missval)
#endif
for ( size_t i = 0; i < gridsize; ++i )
{
int ompthID = cdo_omp_get_thread_num();
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdoVerbose && cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/gridsize);
......@@ -187,14 +185,13 @@ void smooth(int gridID, double missval, const double *restrict array1, double *r
}
else
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
(*nmiss)++;
nmissx++;
array2[i] = missval;
}
}
*nmiss = nmissx;
finish = clock();
if ( cdoVerbose ) printf("gridsearch nearest: %.2f seconds\n", ((double)(finish-start))/CLOCKS_PER_SEC);
......
......@@ -401,18 +401,15 @@ int gridGenArea(int gridID, double* area)
progressInit();
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(findex,gridsize,area,nv,grid_corner_lon,grid_corner_lat,grid_center_lon,grid_center_lat)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(gridsize,area,nv,grid_corner_lon,grid_corner_lat,grid_center_lon,grid_center_lat)
#endif
for ( size_t i = 0; i < gridsize; ++i )
{
int lprogress = 1;
if ( cdo_omp_get_thread_num() != 0 ) lprogress = 0;
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( lprogress ) progressStatus(0, 1, findex/gridsize);
......
......@@ -195,7 +195,7 @@ void *gs_create_kdtree(size_t n, const double *restrict lons, const double *rest
kdata_t min[3], max[3];
min[0] = min[1] = min[2] = 1e9;
max[0] = max[1] = max[2] = -1e9;
#if defined(HAVE_OPENMP45)
#ifdef HAVE_OPENMP45
#pragma omp parallel for reduction(min: min[:3]) reduction(max: max[:3])
#endif
for ( size_t i = 0; i < n; i++ )
......@@ -236,7 +236,7 @@ void *gs_create_nanoflann(size_t n, const double *restrict lons, const double *r
max[0] = max[1] = max[2] = -1e9;
// Generating Point Cloud
pointcloud->pts.resize(n);
#if defined(HAVE_OPENMP45)
#ifdef HAVE_OPENMP45
#pragma omp parallel for reduction(min: min[:3]) reduction(max: max[:3])
#endif
for ( size_t i = 0; i < n; i++ )
......
......@@ -219,9 +219,9 @@ void intlinarr2(double missval, int lon_is_circular,
progressInit();
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(ompNumThreads, field, fieldm, x, y, xm, ym, nxm, nym, gridsize2, missval, findex, nlon1, lon_is_circular, grid1_mask)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(ompNumThreads, field, fieldm, x, y, xm, ym, nxm, nym, gridsize2, missval, nlon1, lon_is_circular, grid1_mask)
#endif
for ( size_t i = 0; i < gridsize2; ++i )
{
......@@ -231,9 +231,6 @@ void intlinarr2(double missval, int lon_is_circular,
field[i] = missval;
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( lprogress ) progressStatus(0, 1, findex/gridsize2);
......
......@@ -106,15 +106,12 @@ void scrip_remap_bicubic_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, r
double findex = 0;
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(weightlinks, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, rv, findex)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(weightlinks, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, rv)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......@@ -312,15 +309,12 @@ void scrip_remap_bicubic(remapgrid_t *src_grid, remapgrid_t *tgt_grid, const dou
double findex = 0;
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
shared(remap_grid_type, tgt_grid_size, src_grid, tgt_grid, src_array, tgt_array, missval, grad1_lat, grad1_lon, grad1_latlon, findex)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(remap_grid_type, tgt_grid_size, src_grid, tgt_grid, src_array, tgt_array, missval, grad1_lat, grad1_lon, grad1_latlon)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......
......@@ -187,15 +187,12 @@ void scrip_remap_bilinear_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid,
/* Loop over destination grid */
#if defined(_OPENMP)
#pragma omp parallel for default(none) schedule(static) \
shared(weightlinks, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, rv, findex)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) schedule(static) reduction(+:findex) \
shared(weightlinks, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, rv)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......@@ -389,15 +386,12 @@ void scrip_remap_bilinear(remapgrid_t *src_grid, remapgrid_t *tgt_grid, const do
/* Loop over destination grid */
#if defined(_OPENMP)
#pragma omp parallel for default(none) schedule(static) \
shared(cdoSilentMode, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, src_array, tgt_array, missval, findex)
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) schedule(static) reduction(+:findex) \
shared(cdoSilentMode, remap_grid_type, tgt_grid_size, src_grid, tgt_grid, src_array, tgt_array, missval)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......
......@@ -798,12 +798,12 @@ void remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, remapva
// Loop over destination grid
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic) default(none) \
#ifdef HAVE_OPENMP4
#pragma omp parallel for schedule(dynamic) default(none) reduction(+:findex) \
shared(ompNumThreads, src_remap_grid_type, tgt_remap_grid_type, src_grid_bound_box, \
rv, cdoVerbose, tgt_num_cell_corners, target_cell_type, \
weightlinks, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, \
search, srch_add, tgt_grid_cell, findex, sum_srch_cells, sum_srch_cells2)
search, srch_add, tgt_grid_cell, sum_srch_cells, sum_srch_cells2)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
......@@ -813,9 +813,6 @@ void remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, remapva
size_t n, num_weights, num_weights_old;
int ompthID = cdo_omp_get_thread_num();
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( ompthID == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......
......@@ -1330,20 +1330,17 @@ void scrip_remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, r
if ( cdoTimer ) timer_start(timer_remap_con_l1);
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(nbins, num_wts, src_centroid_lon, src_centroid_lat, \
remap_store_link_fast, grid_store, link_add1, link_add2, rv, cdoVerbose, max_subseg, \
srch_corner_lat, srch_corner_lon, max_srch_cells, \
src_num_cell_corners, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, srch_add, findex)
src_num_cell_corners, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, srch_add)
#endif
for ( long src_cell_add = 0; src_cell_add < src_grid_size; ++src_cell_add )
{
int ompthID = cdo_omp_get_thread_num();
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( ompthID == 0 ) progressStatus(0, 0.5, findex/src_grid_size);
......@@ -1537,20 +1534,17 @@ void scrip_remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, r
findex = 0;
#if defined(_OPENMP)
#pragma omp parallel for default(none) \
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(nbins, num_wts, tgt_centroid_lon, tgt_centroid_lat, \
remap_store_link_fast, grid_store, link_add1, link_add2, rv, cdoVerbose, max_subseg, \
srch_corner_lat, srch_corner_lon, max_srch_cells, \
tgt_num_cell_corners, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, srch_add, findex)
tgt_num_cell_corners, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, srch_add)
#endif
for ( long tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
int ompthID = cdo_omp_get_thread_num();
#if defined(_OPENMP)
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( ompthID == 0 ) progressStatus(0.5, 0.5, findex/tgt_grid_size);
......
......@@ -397,16 +397,13 @@ void remap_distwgt_weights(size_t num_neighbors, remapgrid_t *src_grid, remapgri
double findex = 0;
#ifdef _OPENMP
#pragma omp parallel for default(none) \
shared(gs, weightlinks, num_neighbors, remap_grid_type, src_grid, tgt_grid, tgt_grid_size, findex) \
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(gs, weightlinks, num_neighbors, remap_grid_type, src_grid, tgt_grid, tgt_grid_size) \
shared(nbr_mask, nbr_add, nbr_dist)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#ifdef _OPENMP
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......@@ -504,16 +501,13 @@ void remap_distwgt(size_t num_neighbors, remapgrid_t *src_grid, remapgrid_t *tgt
double findex = 0;
#ifdef _OPENMP
#pragma omp parallel for default(none) \
shared(gs, num_neighbors, src_remap_grid_type, src_grid, tgt_grid, tgt_grid_size, findex) \
#ifdef HAVE_OPENMP4
#pragma omp parallel for default(none) reduction(+:findex) \
shared(gs, num_neighbors, src_remap_grid_type, src_grid, tgt_grid, tgt_grid_size) \
shared(src_array, tgt_array, missval, nbr_mask, nbr_add, nbr_dist)
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#ifdef _OPENMP
#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......@@ -644,18 +638,15 @@ void intgriddis(field_type *field1, field_type *field2, size_t num_neighbors)
size_t nmiss = 0;
double findex = 0;
#ifdef _OPENMP
#ifdef HAVE_OPENMP4
/*
#pragma omp parallel for default(none) \
shared(gs, num_neighbors, src_grid, tgt_grid, tgt_grid_size, findex) \
#pragma omp parallel for default(none) reduction(+:findex) \
shared(gs, num_neighbors, src_grid, tgt_grid, tgt_grid_size) \
shared(src_array, tgt_array, missval, nbr_mask, nbr_add, nbr_dist)
*/
#endif
for ( size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add )
{
#ifdef _OPENMP
//#include "pragma_omp_atomic_update.h"
#endif
findex++;
if ( cdo_omp_get_thread_num() == 0 ) progressStatus(0, 1, findex/tgt_grid_size);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment