Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenACC cleanup part 1 #247

Merged
merged 13 commits into from
Sep 19, 2023
Merged
94 changes: 75 additions & 19 deletions tools/fregrid/conserve_interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "constant.h"
#include "globals.h"
#include "create_xgrid.h"
#include "create_xgrid_acc.h"
#include "create_xgrid_util.h"
#include "mosaic_util.h"
#include "conserve_interp.h"
Expand All @@ -48,12 +49,10 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
Grid_config *grid_out, Interp_config *interp, unsigned int opcode)
{
int n, m, i, ii, jj, nx_in, ny_in, nx_out, ny_out, tile;
size_t nxgrid, nxgrid2, nxgrid_prev;
size_t nxgrid, nxgrid_prev;
int *i_in=NULL, *j_in=NULL, *i_out=NULL, *j_out=NULL;
int *tmp_t_in=NULL, *tmp_i_in=NULL, *tmp_j_in=NULL, *tmp_i_out=NULL, *tmp_j_out=NULL;
double *xgrid_area=NULL, *tmp_area=NULL, *xgrid_clon=NULL, *xgrid_clat=NULL;
int mxxgrid;
int zero=0;
double *xgrid_area=NULL, *xgrid_clon=NULL, *xgrid_clat=NULL;
int mxxgrid, zero=0;

typedef struct{
double *area;
Expand Down Expand Up @@ -82,13 +81,40 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
cell_in[m].clat[n] = 0;
}
}
//START NTILES_OUT
for(n=0; n<ntiles_out; n++) {
nx_out = grid_out[n].nxc;
ny_out = grid_out[n].nyc;

double *lon_out_min_list=NULL, *lon_out_max_list=NULL, *lon_out_avg=NULL;
double *lat_out_min_list=NULL, *lat_out_max_list=NULL;
double *lon_out_list=NULL, *lat_out_list=NULL;
int *n2_list=NULL;

nx_out = grid_out[n].nxc;
ny_out = grid_out[n].nyc;
interp[n].nxgrid = 0;

#pragma acc enter data copyin(grid_out[n].lonc[0:(nx_out+1)*(ny_out+1)], \
grid_out[n].latc[0:(nx_out+1)*(ny_out+1)])

//allocate memory for the lists
malloc_minmaxavg_lists(nx_out*ny_out, &lon_out_min_list, &lon_out_max_list,
&lat_out_min_list, &lat_out_max_list, &n2_list,
&lon_out_avg, &lon_out_list, &lat_out_list);

#define MAX_V 8
#pragma acc enter data create(lon_out_list[0:MAX_V*nx_out*ny_out], lat_out_list[0:MAX_V*nx_out*ny_out], \
lat_out_min_list[0:nx_out*ny_out], lat_out_max_list[0:nx_out*ny_out], \
lon_out_min_list[0:nx_out*ny_out], lon_out_max_list[0:nx_out*ny_out], \
lon_out_avg[0:nx_out*ny_out], n2_list[0:nx_out*ny_out])

//compute the list values
get_minmaxavg_lists(nx_out, ny_out, grid_out[n].lonc, grid_out[n].latc,
lon_out_min_list, lon_out_max_list, lat_out_min_list, lat_out_max_list,
n2_list, lon_out_avg, lon_out_list, lat_out_list);

//START NTILES_IN
for(m=0; m<ntiles_in; m++) {
double *mask;
double y_min, y_max, yy;
double *mask, y_min, y_max, yy;
int jstart, jend, ny_now, j;

nx_in = grid_in[m].nx;
Expand All @@ -97,14 +123,19 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
mask = (double *)malloc(nx_in*ny_in*sizeof(double));
for(i=0; i<nx_in*ny_in; i++) mask[i] = 1.0;

mxxgrid=get_maxxgrid();
malloc_xgrid_arrays(mxxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat);
//CHECK THIS, might not be from 0
#pragma acc enter data copyin(grid_in[m].latc[0:(nx_in+1)*(ny_in+1)], \
grid_in[m].lonc[0:(nx_in+1)*(ny_in+1)], mask[0:nx_in*ny_in])

if(opcode & GREAT_CIRCLE) {
mxxgrid=get_maxxgrid();
malloc_xgrid_arrays(mxxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat);
nxgrid = create_xgrid_great_circle(&nx_in, &ny_in, &nx_out, &ny_out, grid_in[m].lonc,
grid_in[m].latc, grid_out[n].lonc, grid_out[n].latc,
mask, i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
}
else {

y_min = minval_double((nx_out+1)*(ny_out+1), grid_out[n].latc);
y_max = maxval_double((nx_out+1)*(ny_out+1), grid_out[n].latc);
jstart = ny_in; jend = -1;
Expand All @@ -129,22 +160,41 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc,
mask, i_in, j_in, i_out, j_out, xgrid_area);
for(i=0; i<nxgrid; i++) j_in[i] += jstart;
free(mask);
} //opcode CONSERVE_ORDER1

else if(opcode & CONSERVE_ORDER2) {
int g_nxgrid;
int *g_i_in, *g_j_in;
int g_nxgrid, *g_i_in, *g_j_in;
double *g_area, *g_clon, *g_clat;

time_start = clock();

mxxgrid=get_maxxgrid();
malloc_xgrid_arrays(mxxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon , &xgrid_clat);

#pragma acc enter data create(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \
i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid])
#ifdef _OPENACC
nxgrid = create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1),
grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc,
lon_out_min_list, lon_out_max_list, lon_out_avg,
lat_out_min_list, lat_out_max_list, n2_list,
lon_out_list, lat_out_list, mask,
i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
#else
nxgrid = create_xgrid_2dx2d_order2(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1),
grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc,
mask, i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat);
#endif
if(DEBUG) printf("nxgrid, m, & n is: %d %d %d\n",nxgrid, m, n);
time_end = clock();
time_nxgrid += 1.0 * (time_end - time_start)/CLOCKS_PER_SEC;

free(mask);
#pragma acc exit data delete(mask)
#pragma acc exit data copyout(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \
i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid])

for(i=0; i<nxgrid; i++) j_in[i] += jstart;

/* For the purpose of bitiwise reproducing, the following operation is needed. */
Expand All @@ -167,18 +217,15 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
cell_in[m].clon[ii] += g_clon[i];
cell_in[m].clat[ii] += g_clat[i];
}
free(g_i_in);
free(g_j_in);
free(g_area);
free(g_clon);
free(g_clat);
free(g_i_in); free(g_j_in); free(g_area); free(g_clon); free(g_clat);
} // if g_nxgrid > 0
}
else
mpp_error("conserve_interp: interp_method should be CONSERVE_ORDER1 or CONSERVE_ORDER2");
} //opcode CONSERVE_ORDER2

free(mask);

//for all opcodes
if(nxgrid > 0) {
nxgrid_prev = interp[n].nxgrid;
interp[n].nxgrid += nxgrid;
Expand Down Expand Up @@ -231,8 +278,17 @@ void setup_conserve_interp(int ntiles_in, const Grid_config *grid_in, int ntiles
}
}
} /* if(nxgrid>0) */
malloc_xgrid_arrays(zero, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon , &xgrid_clat);
#pragma acc exit data delete(grid_in[m].latc, grid_in[m].lonc)
} // ntiles_in
malloc_minmaxavg_lists(zero, &lon_out_min_list, &lon_out_max_list,
&lat_out_min_list, &lat_out_max_list, &n2_list,
&lon_out_avg, &lon_out_list, &lat_out_list);
#pragma acc exit data delete(lon_out_min_list, lon_out_max_list, lat_out_min_list, \
lat_out_max_list, n2_list, lon_out_avg, lon_out_list, lat_out_list)
#pragma acc exit data delete(grid_out[n].latc, grid_out[n].lonc)
} // ntimes_out

if(DEBUG) print_time("time_nxgrid", time_nxgrid);

if(opcode & CONSERVE_ORDER2) {
Expand Down
2 changes: 2 additions & 0 deletions tools/fregrid/conserve_interp_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,7 @@
void read_remap_file( int ntiles_in, int ntiles_out, Grid_config *grid_out,
Interp_config *interp, unsigned int opcode);

void malloc_xgrid_arrays( int nsize, int **i_in, int **j_in, int **i_out, int **j_out,
double **xgrid_area, double **xgrid_clon, double **xgrid_clat );

#endif
2 changes: 2 additions & 0 deletions tools/libfrencutils/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ libfrencutils_a_SOURCES = affinity.c \
create_xgrid.h \
create_xgrid_util.c \
create_xgrid_util.h \
create_xgrid_acc.c \
create_xgrid_acc.h \
gradient_c2l.c \
gradient_c2l.h \
interp.c \
Expand Down
160 changes: 160 additions & 0 deletions tools/libfrencutils/create_xgrid_acc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/***********************************************************************
* GNU Lesser General Public License
*
* This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools).
*
* FRE-NCtools is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or (at
* your option) any later version.
*
* FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FRE-NCTools. If not, see
* <http://www.gnu.org/licenses/>.
**********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "mosaic_util.h"
#include "create_xgrid.h"
#include "create_xgrid_util.h"
#include "create_xgrid_acc.h"
#include "constant.h"

#define AREA_RATIO_THRESH (1.e-6)
#define MASK_THRESH (0.5)
#define EPSLN8 (1.e-8)
#define EPSLN30 (1.0e-30)
#define EPSLN10 (1.0e-10)

/*******************************************************************************
create_xgrid_2dx2d_order2 OPENACC version
*******************************************************************************/
int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
const double *lon_out_min_list, const double *lon_out_max_list, const double *lon_out_avg,
const double *lat_out_min_list, const double *lat_out_max_list, const int *n2_list,
const double *lon_out_list, const double *lat_out_list,
const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
double *xgrid_area, double *xgrid_clon, double *xgrid_clat)
{

#define MAX_V 8
int nx1, nx2, ny1, ny2, nx1p, nx2p, nxgrid;
double *area_in, *area_out;
int ij, i1, j1;
int mxxgrid;
int n=0;

nx1 = *nlon_in;
ny1 = *nlat_in;
nx2 = *nlon_out;
ny2 = *nlat_out;
nx1p = nx1 + 1;
nx2p = nx2 + 1;
mxxgrid = get_maxxgrid();

area_in = (double *)malloc(nx1*ny1*sizeof(double));
area_out = (double *)malloc(nx2*ny2*sizeof(double));
get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in);
get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out);

nxgrid = 0;

#pragma acc data present(lon_out[0:(nx2+1)*(ny2+1)], lat_out[0:(nx2+1)*(ny2+1)])
#pragma acc data present(lon_in[0:(nx1+1)*(ny1+1)], lat_in[0:(nx1+1)*(ny1+1)], mask_in[0:nx1*ny1])
#pragma acc data present(lon_out_list[0:MAX_V*nx2*ny2], lat_out_list[0:MAX_V*nx2*ny2], \
lat_out_min_list[0:nx2*ny2], lat_out_max_list[0:nx2*ny2], \
lon_out_min_list[0:nx2*ny2], lon_out_max_list[0:nx2*ny2], \
lon_out_avg[0:nx2*ny2], n2_list[0:nx2*ny2])
#pragma acc data present(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \
i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid])
#pragma acc data copyin(area_in[0:nx1*ny1], area_out[0:nx2*ny2])
#pragma acc kernels copy(nxgrid)
{
#pragma acc loop independent collapse(2) //reduction(+:nxgrid)
for(j1=0; j1<ny1; j1++) for(i1=0; i1<nx1; i1++) if( mask_in[j1*nx1+i1] > MASK_THRESH ) {
int n0, n1, n2, n3, n1_in;
double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg;
double x1_in[MV], y1_in[MV];
n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1;
n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1;
x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0];
x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1];
x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2];
x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3];
lat_in_min = minval_double(4, y1_in);
lat_in_max = maxval_double(4, y1_in);
n1_in = fix_lon(x1_in, y1_in, 4, M_PI);
lon_in_min = minval_double(n1_in, x1_in);
lon_in_max = maxval_double(n1_in, x1_in);
lon_in_avg = avgval_double(n1_in, x1_in);
#pragma acc loop independent //reduction(+:nxgrid)
for(ij=0; ij<nx2*ny2; ij++) {
int n_out, i2, j2, n2_in, l;
double xarea, dx, lon_out_min, lon_out_max;
double x2_in[MAX_V], y2_in[MAX_V], x_out[MV], y_out[MV];;

i2 = ij%nx2;
j2 = ij/nx2;

if(lat_out_min_list[ij] >= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue;
/* adjust x2_in according to lon_in_avg*/
n2_in = n2_list[ij];
#pragma acc loop seq
for(l=0; l<n2_in; l++) {
x2_in[l] = lon_out_list[ij*MAX_V+l];
y2_in[l] = lat_out_list[ij*MAX_V+l];
}
lon_out_min = lon_out_min_list[ij];
lon_out_max = lon_out_max_list[ij];
dx = lon_out_avg[ij] - lon_in_avg;
if(dx < -M_PI ) {
lon_out_min += TPI;
lon_out_max += TPI;
#pragma acc loop seq
for (l=0; l<n2_in; l++) x2_in[l] += TPI;
}
else if (dx > M_PI) {
lon_out_min -= TPI;
lon_out_max -= TPI;
#pragma acc loop seq
for (l=0; l<n2_in; l++) x2_in[l] -= TPI;
}

/* x2_in should in the same range as x1_in after lon_fix, so no need to
consider cyclic condition
*/
if(lon_out_min >= lon_in_max || lon_out_max <= lon_in_min ) continue;
n_out = 1;
if ( (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) {
double min_area;
xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1];
min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]);
if( xarea/min_area > AREA_RATIO_THRESH ) {
xgrid_area[nxgrid] = xarea;
xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg);
xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out );
i_in[nxgrid] = i1;
j_in[nxgrid] = j1;
i_out[nxgrid] = i2;
j_out[nxgrid] = j2;
#pragma atomic update
nxgrid++;
}
}
}
}
}

free(area_in);
free(area_out);

return nxgrid;

};/* get_xgrid_2Dx2D_order2 */
30 changes: 30 additions & 0 deletions tools/libfrencutils/create_xgrid_acc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/***********************************************************************
* GNU Lesser General Public License
*
* This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools).
*
* FRE-NCtools is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or (at
* your option) any later version.
*
* FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FRE-NCTools. If not, see
* <http://www.gnu.org/licenses/>.
**********************************************************************/
#ifndef CREATE_XGRID_VER_H_
#define CREATE_XGRID_VER_H_

int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out,
const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out,
const double *lon_out_min_list, const double *lon_out_max_list, const double *lon_out_avg,
const double *lat_out_min_list, const double *lat_out_max_list, const int *n2_list,
const double *lon_out_list, const double *lat_out_list,
const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out,
double *xgrid_area, double *xgrid_clon, double *xgrid_clat);
#endif
Loading
Loading