Commit dfab4839 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

usvs_to_uv: if ( rla < 0.0 ) rla += 360.0 (bug fix)

parent ba08f2d5
......@@ -3,6 +3,10 @@
* using CDI library version 1.7.0
* Version 1.7.0 released
2015-09-30 Uwe Schulzweida
* usvs_to_uv: if ( rla < 0.0 ) rla += 360.0 (bug fix) [report: Laura Niederdrenk]
2015-09-25 Uwe Schulzweida
* added operator sortparam to sort all variables by there parameter number
......
......@@ -187,6 +187,7 @@ void usvs_to_uv(double us, double vs, double phi, double rla,
/* umrechnung von grad in bogenmass */
zpolphi = polphi*DEG2RAD;
zpollam = pollam*DEG2RAD;
if ( rla < 0.0 ) rla += 360.0;
zrla = rla *DEG2RAD;
pollamd = pollam;
if ( pollamd < 0.0 ) pollamd += 360.0;
......
......@@ -703,12 +703,12 @@ void remap_conserv_weights(remapgrid_t *src_grid, remapgrid_t *tgt_grid, remapva
/* Loop over destination grid */
#if defined(_OPENMP)
#pragma omp parallel for default(shared) \
#pragma omp parallel for default(none) \
shared(ompNumThreads, lyac, nbins, num_wts, src_remap_grid_type, tgt_remap_grid_type, src_grid_bound_box, \
src_edge_type, tgt_edge_type, partial_areas2, partial_weights2, \
rv, cdoVerbose, max_srch_cells2, tgt_num_cell_corners, target_cell_type, \
weightlinks, \
srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, \
srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, nx, \
overlap_buffer2, src_grid_cells2, srch_add2, tgt_grid_cell2, findex, sum_srch_cells, sum_srch_cells2) \
private(srch_add, tgt_grid_cell, tgt_area, k, num_srch_cells, max_srch_cells, \
partial_areas, partial_weights, overlap_buffer, src_grid_cells, src_cell_add, ioffset)
......
void fun1(int nelem, double *restrict array1, const double *restrict array2)
void fun1(const unsigned nelem, double *restrict array1, const double *restrict array2)
{
#if defined(_OPENMP)
#pragma omp simd aligned(array1:64) aligned(array2:64)
#endif
for ( int i = 0; i < nelem; ++i )
for ( unsigned i = 0; i < nelem; ++i )
array1[i] += array2[i];
}
void fun2(int nelem, double *restrict array1, const double *restrict array2, const double *restrict array3)
void fun2(const unsigned nelem, double *restrict array1, const double *restrict array2, const double *restrict array3)
{
#if defined(_OPENMP)
#pragma omp simd aligned(array1:64) aligned(array2:64) aligned(array3:64)
#endif
for ( int i = 0; i < nelem; ++i )
for ( unsigned i = 0; i < nelem; ++i )
array1[i] += array2[i]*array3[i];
}
// aligned access gives 3-5% speedup
// icc -std=c99 -O2 -xCORE-AVX2 -qopt-report=5 -openmp memalign.c fun.c
// gcc -std=c99 -O3 -march=native -ftree-vectorize -fdump-tree-vect-blocks -fopt-info-optimized -fopenmp memalign.c fun.c
// icc -g -std=c99 -O2 -xCORE-AVX2 -qopt-report=5 -openmp memalign.c fun.c
// gcc -g -std=c99 -O3 -march=native -ftree-vectorize -fdump-tree-vect-blocks -fopt-info-optimized -fopenmp memalign.c fun.c
/*
ICC16/hama2:
fun1 fun2
SSE3 12.7 16.8 unaligned
SSE3 12.4 13.7 aligned
AVX2 10.4 10.4 unaligned
AVX2 10.4 10.4 aligned
*/
/*
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif
/*
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200112L
#endif
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
void fun1(int nelem, double *restrict array1, double *restrict array2);
void fun2(int nelem, double *restrict array1, double *restrict array2, double *restrict array3);
//#define NITER 200000000
#define NITER 2000000
void fun1(const unsigned nelem, double *restrict array1, double *restrict array2);
void fun2(const unsigned nelem, double *restrict array1, double *restrict array2, double *restrict array3);
void print_opt(void)
{
......@@ -57,28 +69,34 @@ int get_alignment(double *ptr)
int main(void)
{
int nelem = 97;
double start_time;
// int nelem = 97;
int nelem = 4*4096;
double *array1, *array2;
print_opt();
malloc(nelem*sizeof(double));
//array1 = (double *) malloc(nelem*sizeof(double));
posix_memalign((void **)&array1, 64, nelem*sizeof(double));
array1 = (double *) malloc(nelem*sizeof(double));
//posix_memalign((void **)&array1, 64, nelem*sizeof(double));
malloc(nelem*sizeof(double));
//array2 = (double *) malloc(nelem*sizeof(double));
posix_memalign((void **)&array2, 64, nelem*sizeof(double));
array2 = (double *) malloc(nelem*sizeof(double));
//posix_memalign((void **)&array2, 64, nelem*sizeof(double));
printf("mem alignment: %d %d\n", get_alignment(array1), get_alignment(array2));
for ( int i = 0; i < nelem; ++i ) array1[i] = 0;
for ( int i = 0; i < nelem; ++i ) array2[i] = 1;
/*
for ( int i = 0; i < 200000000; ++i )
start_time = omp_get_wtime();
for ( int i = 0; i < NITER; ++i )
fun1(nelem, array1, array2);
*/
for ( int i = 0; i < 200000000; ++i )
printf("\n fun1 in %lf seconds\n ",omp_get_wtime() - start_time);
start_time = omp_get_wtime();
for ( int i = 0; i < NITER; ++i )
fun2(nelem, array1, array2, array2);
printf("\n fun2 in %lf seconds\n ",omp_get_wtime() - start_time);
free(array1);
free(array2);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment