diff --git a/Makefile.am b/Makefile.am index 1dfb38fa..ba3157fe 100644 --- a/Makefile.am +++ b/Makefile.am @@ -59,4 +59,5 @@ endif if ENABLE_ACC SUBDIRS += tools/libfrencutils_acc SUBDIRS += tools/fregrid_acc +SUBDIRS += t_acc endif diff --git a/configure.ac b/configure.ac index 8ac7ddd3..71b999cc 100644 --- a/configure.ac +++ b/configure.ac @@ -214,12 +214,18 @@ AC_CONFIG_FILES([Makefile tools/river_regrid/Makefile tools/runoff_regrid/Makefile tools/transfer_to_mosaic_grid/Makefile - t/Makefile tools/simple_hydrog/Makefile tools/simple_hydrog/postp/Makefile tools/simple_hydrog/lakes/Makefile tools/simple_hydrog/rmvpr/Makefile tools/simple_hydrog/libfmslite/Makefile + t/Makefile + t_acc/Makefile + t_acc/test_read_remap_file/Makefile + t_acc/test_get_grid_cell_struct/Makefile + t_acc/test_get_upbound_nxcells_2dx2d/Makefile + t_acc/test_get_interp_order1/Makefile + ]) AC_OUTPUT diff --git a/t_acc/Makefile.am b/t_acc/Makefile.am new file mode 100644 index 00000000..fb07c844 --- /dev/null +++ b/t_acc/Makefile.am @@ -0,0 +1,21 @@ +#*********************************************************************** +#* GNU Lesser General Public License +#* +#* This file is part of the GFDL Flexible Modeling System (FMS). +#* +#* FMS is free software: you can redistribute it and/or modify it under +#* the terms of the GNU Lesser General Public License as published by +#* the Free Software Foundation, either version 3 of the License, or (at +#* your option) any later version. +#* +#* FMS is distributed in the hope that it will be useful, but WITHOUT +#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +#* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +#* for more details. +#* +#* You should have received a copy of the GNU Lesser General Public +#* License along with FMS. If not, see . +#*********************************************************************** + +SUBDIRS = test_read_remap_file test_get_grid_cell_struct test_get_upbound_nxcells_2dx2d\ + test_get_interp_order1 diff --git a/t_acc/test_get_grid_cell_struct/Makefile.am b/t_acc/test_get_grid_cell_struct/Makefile.am new file mode 100644 index 00000000..0c368d47 --- /dev/null +++ b/t_acc/test_get_grid_cell_struct/Makefile.am @@ -0,0 +1,34 @@ +#*********************************************************************** +#* GNU Lesser General Public License +#* +#* This file is part of the GFDL Flexible Modeling System (FMS). +#* +#* FMS is free software: you can redistribute it and/or modify it under +#* the terms of the GNU Lesser General Public License as published by +#* the Free Software Foundation, either version 3 of the License, or (at +#* your option) any later version. +#* +#* FMS is distributed in the hope that it will be useful, but WITHOUT +#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +#* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +#* for more details. +#* +#* You should have received a copy of the GNU Lesser General Public +#* License along with FMS. If not, see . +#*********************************************************************** + +check_PROGRAMS = test_get_grid_cell_struct + +AM_CFLAGS = $(NETCDF_CFLAGS) \ + -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/libfrencutils \ + -I$(top_srcdir)/tools/libfrencutils_acc -acc + +LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ + $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/libfrencutils/libfrencutils.a \ + $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + +test_get_grid_cell_struct_SOURCES = test_get_grid_cell_struct.c + +TESTS = test_get_grid_cell_struct diff --git a/t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c b/t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c new file mode 100644 index 00000000..927bf13e --- /dev/null +++ b/t_acc/test_get_grid_cell_struct/test_get_grid_cell_struct.c @@ -0,0 +1,222 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any loner version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +// This test tests the function test_get_grid_cell_struct used in fregrid_acc. +// Properties of each grid cell in a smple made-up grid with no poles are computed +// on the device. This test ensures that the data transfer between the host and device +// and computations have executed on the device as expected. + +#include +#include +#include +#include "create_xgrid_utils_acc.h" +#include "interp_utils_acc.h" +#include "parameters.h" +#include "globals_acc.h" + +#define NLON 36 // 36 cells in lon direction (36+1 grid points in the lon direction for each lat point) +#define NLAT 4 // 4 cells in lat direction ( 4+1 grid points in the lat direction for each lon point) + +double dlon = 360.0/NLON; +double dlat = 30.0; + +double const tolerance = 1.e-7; + +typedef struct { + double lon_min[NLON*NLAT]; + double lon_max[NLON*NLAT]; + double lat_min[NLON*NLAT]; + double lat_max[NLON*NLAT]; + double lon_cent[NLON*NLAT]; + double *lon_vertices[NLON*NLAT]; + double *lat_vertices[NLON*NLAT]; +} Answers; + +void copy_cell_to_host(Grid_cells_struct_config *grid_cells); +void reset_cell_on_host(Grid_cells_struct_config *grid_cells); +void get_answers(const double *lon, const double *lat, Answers *answers); +void check_answers( const Grid_cells_struct_config *grid_cells, const Answers *answers); +void check_ranswer( const int n, const double *answer, const double *checkme); + +//TODO: test for compute poly_area + +int main(){ + + Answers answers; + Grid_cells_struct_config grid_cells; + Grid_config grid; + + grid.nxc=NLON; + grid.nyc=NLAT; + grid.lonc = (double *)calloc( (NLON+1)*(NLAT+1), sizeof(double) ); + grid.latc = (double *)calloc( (NLON+1)*(NLAT+1), sizeof(double) ); + + // set grid; no poles, do not need to worry about fix_lon + for(int ilat=0 ; ilatlon_vertices[icell] = (double *)calloc(MAX_V, sizeof(double)); + answers->lat_vertices[icell] = (double *)calloc(MAX_V, sizeof(double)); + } + + //get min max avg replaceme lon values of a cell + for(int ilat=0 ; ilatlat_min[icell] = latitude_min; + answers->lat_max[icell] = latitude_max; + answers->lon_min[icell] = (0 + dlon*ilon)* D2R; + answers->lon_max[icell] = (0 + dlon*(ilon+1))* D2R; + answers->lon_cent[icell] = (dlon*ilon+0.5*dlon)* D2R; + } + } + + //get vertices for each cell + for(int ilat=0 ; ilatlat_vertices[icell][0]=answers->lat_min[icell]; + answers->lat_vertices[icell][1]=answers->lat_min[icell]; + answers->lat_vertices[icell][2]=answers->lat_max[icell]; + answers->lat_vertices[icell][3]=answers->lat_max[icell]; + + answers->lon_vertices[icell][0]=answers->lon_min[icell]; + answers->lon_vertices[icell][1]=answers->lon_max[icell]; + answers->lon_vertices[icell][2]=answers->lon_max[icell]; + answers->lon_vertices[icell][3]=answers->lon_min[icell]; + } + } + +} + +void reset_cell_on_host(Grid_cells_struct_config *grid_cells) +{ + + for( int icell=0 ; icelllon_min[icell]=-99.; + grid_cells->lon_max[icell]=-99.; + grid_cells->lat_min[icell]=-99.; + grid_cells->lat_max[icell]=-99.; + grid_cells->lon_cent[icell]=-99.; + grid_cells->nvertices[icell]=-99; + for(int ipt=0 ; iptlon_vertices[icell][ipt]=-99.; + grid_cells->lat_vertices[icell][ipt]=-99.; + } + } + +} + +void copy_cell_to_host(Grid_cells_struct_config *grid_cells) +{ + + int ncell = NLON*NLAT; +#pragma acc exit data copyout( grid_cells->lon_min[:ncell], grid_cells->lon_max[:ncell], \ + grid_cells->lat_min[:ncell], grid_cells->lat_max[:ncell], \ + grid_cells->lon_cent[:ncell], grid_cells->nvertices[:ncell], \ + grid_cells->lon_vertices[:ncell][:MAX_V],\ + grid_cells->lat_vertices[:ncell][:MAX_V]) +} + +void check_answers( const Grid_cells_struct_config *grid_cells, const Answers *answers) +{ + + printf("Checking for nubmer of cell vertices\n"); + for(int i=0 ; invertices[i] != 4 ) { + printf("ERROR element %d: %d vs %d\n", i, grid_cells->nvertices[i], 4); + exit(1); + } + } + + printf("Checking for min longitudinal point for each cell\n"); + check_ranswer(NLON*NLAT, answers->lon_min, grid_cells->lon_min); + + printf("Checking for max longitudinal point for each cell\n"); + check_ranswer(NLON*NLAT, answers->lon_max, grid_cells->lon_max); + + printf("Checking for min latitudinal point for each cell\n"); + check_ranswer(NLON*NLAT, answers->lat_min, grid_cells->lat_min); + + printf("Checking for max latitudinal point for each cell\n"); + check_ranswer(NLON*NLAT, answers->lat_max, grid_cells->lat_max); + + printf("Checking for avg longitudinal point for each cell\n"); + check_ranswer(NLON*NLAT, answers->lon_cent, grid_cells->lon_cent); + + printf("Checking for longitudinal vertices for each cell\n"); + for(int icell=0 ; icelllon_vertices[icell], grid_cells->lon_vertices[icell]); + } + + printf("Checking for latitudinal vertices for each cell\n"); + for(int icell=0 ; icelllat_vertices[icell], grid_cells->lat_vertices[icell]); + } + + +} + +void check_ranswer( const int n, const double *answers, const double *checkme) { + for(int i=0 ; itolerance ) { + printf(" ERROR element %d: %lf vs %lf, difference of %e\n", + i, answers[i], checkme[i], abs(answers[i]-checkme[i])); + exit(1); + } + } +} diff --git a/t_acc/test_get_interp_order1/Makefile.am b/t_acc/test_get_interp_order1/Makefile.am new file mode 100644 index 00000000..88b0f3e5 --- /dev/null +++ b/t_acc/test_get_interp_order1/Makefile.am @@ -0,0 +1,35 @@ +#*********************************************************************** +#* GNU Lesser General Public License +#* +#* This file is part of the GFDL Flexible Modeling System (FMS). +#* +#* FMS is free software: you can redistribute it and/or modify it under +#* the terms of the GNU Lesser General Public License as published by +#* the Free Software Foundation, either version 3 of the License, or (at +#* your option) any later version. +#* +#* FMS is distributed in the hope that it will be useful, but WITHOUT +#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +#* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +#* for more details. +#* +#* You should have received a copy of the GNU Lesser General Public +#* License along with FMS. If not, see . +#*********************************************************************** + +check_PROGRAMS = test_get_interp_order1 + +AM_CFLAGS = $(NETCDF_CFLAGS) \ + -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/libfrencutils \ + -I$(top_srcdir)/tools/libfrencutils_acc -acc + +LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ + $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ + $(top_builddir)/tools/libfrencutils/libfrencutils.a \ + $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + +test_get_interp_order1_SOURCES = test_get_interp_order1.c + +TESTS = test_get_interp_order1 diff --git a/t_acc/test_get_interp_order1/test_get_interp_order1.c b/t_acc/test_get_interp_order1/test_get_interp_order1.c new file mode 100644 index 00000000..05278205 --- /dev/null +++ b/t_acc/test_get_interp_order1/test_get_interp_order1.c @@ -0,0 +1,206 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +// This test tests the function create_xgrid_2dx2d_order2_acc for a simple +// case where the input grid is identical to the output grid. The exchange +// grid is identical to the input/output grid. Only the parent indices corresponding +// to each exchange cell are checked. + +#include +#include +#include +#include "globals_acc.h" +#include "interp_utils_acc.h" +#include "conserve_interp_acc.h" +#include "create_xgrid_utils_acc.h" +#include "create_xgrid_acc.h" + +void generate_input_is_same_as_output_grids(Grid_config *input_grid, Grid_config *output_grid, + Interp_per_input_tile *interp_answers); +int run_tests(Grid_config *input_grid, Grid_config *output_grid, + Interp_per_input_tile *interp); +void check_answers(const Interp_per_input_tile *interp_answers, const Interp_per_input_tile *interp); +void check_ianswers(const int n, const int *answers, const int *checkme); + +//TODO: add more complicated tests +int main() +{ + + Grid_config input_grid, output_grid; + Interp_per_input_tile interp_answers, interp; + + printf("CHECKING CASE 1: Input grid = Output_grid\n"); + + // get input_grid and output_grid + generate_input_is_same_as_output_grids(&input_grid, &output_grid, &interp_answers); + + // run create_xgrid order 1 + run_tests(&input_grid, &output_grid, &interp); + + // check answers + check_answers(&interp_answers, &interp); + + //test successful + return 0; + +} + +void generate_input_is_same_as_output_grids(Grid_config *input_grid, Grid_config *output_grid, + Interp_per_input_tile *interp_answers) +{ + + int nlon_cells = 360-1; //[ 0, 1, ... 360] + int nlat_cells = 3-1; //[ -30, 0, 30] + int ncells = nlon_cells * nlat_cells; + int ngridpts = (nlon_cells+1) * (nlat_cells+1); + double dlat = 30.0; + double dlon = 1.0; + + int i=0; + + input_grid->nxc = nlon_cells ; input_grid->nyc = nlat_cells; + output_grid->nxc = nlon_cells ; output_grid->nyc = nlat_cells; + + input_grid->latc = (double *)malloc(ngridpts*sizeof(double)); + input_grid->lonc = (double *)malloc(ngridpts*sizeof(double)); + output_grid->latc = (double *)malloc(ngridpts*sizeof(double)); + output_grid->lonc = (double *)malloc(ngridpts*sizeof(double)); + + for( int ilat=0 ; ilatlatc[i] = latitude; + output_grid->latc[i] = latitude; + input_grid->lonc[i] = (ilon * dlon)*D2R; + output_grid->lonc[i] = (ilon * dlon)*D2R; + i++; + } + } + + //answers + interp_answers->nxcells = ncells; + interp_answers->input_parent_cell_index = (int *)malloc(ncells*sizeof(int)); + interp_answers->output_parent_cell_index = (int *)malloc(ncells*sizeof(int)); + //interp_answers + for(int ij1=0 ; ij1input_parent_cell_index[ij1] = ij1; + interp_answers->output_parent_cell_index[ij1] = ij1; + } + +} + + +void check_answers(const Interp_per_input_tile *interp_answers, const Interp_per_input_tile *interp) +{ + + //reset interp on host to reassure device data is correct + for(int i=0 ; inxcells; i++) { + interp->input_parent_cell_index[i] = -99; + interp->output_parent_cell_index[i] = -99; + } + +#pragma acc update host( interp->nxcells ) + int nxcells = interp->nxcells; + +#pragma acc exit data copyout( interp->input_parent_cell_index[:nxcells],\ + interp->output_parent_cell_index[:nxcells]) + + printf("checking nxcells\n"); + check_ianswers(1, &(interp_answers->nxcells), &interp->nxcells); + + printf("checking input_parent_cell_index\n"); + check_ianswers(nxcells, interp_answers->input_parent_cell_index, interp->input_parent_cell_index); + + printf("checking output_parent_cell_index\n"); + check_ianswers(nxcells, interp_answers->output_parent_cell_index, interp->output_parent_cell_index); + +} + + +int run_tests(Grid_config *input_grid, Grid_config *output_grid, Interp_per_input_tile *interp) +{ + + int nlon_input_cells = input_grid->nxc; + int nlat_input_cells = input_grid->nyc; + int nlon_output_cells = output_grid->nxc; + int nlat_output_cells = output_grid->nyc; + int ncells_input = nlon_input_cells * nlat_input_cells; + int ncells_output = nlon_output_cells * nlat_output_cells; + int ngridpts_input = (nlon_input_cells+1)*(nlat_input_cells+1); + int ngridpts_output = (nlon_output_cells+1)*(nlat_output_cells+1); + + int upbound_nxcells, nxcells, jlat_overlap_starts, jlat_overlap_ends; + int *approx_nxcells_per_ij1, *ij2_start, *ij2_end; + double *input_grid_mask; + Grid_cells_struct_config output_grid_cells; + + //copy grid to device + copy_grid_to_device_acc(ngridpts_input, input_grid->latc, input_grid->lonc); + copy_grid_to_device_acc(ngridpts_output, output_grid->latc, output_grid->lonc); + + //get mask to skip input cells in creating interp + get_input_grid_mask_acc(ncells_input, &input_grid_mask); + + //get output grid cell info + get_grid_cell_struct_acc(nlon_output_cells, nlat_output_cells, output_grid, &output_grid_cells); + + //get bounding index + get_bounding_indices_acc(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, + output_grid->latc, input_grid->latc, &jlat_overlap_starts, &jlat_overlap_ends); + + //malloc and create arrays + create_upbound_nxcells_arrays_on_device_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + + upbound_nxcells = get_upbound_nxcells_2dx2d_acc(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, + jlat_overlap_starts, jlat_overlap_ends, + input_grid->lonc, input_grid->latc, + output_grid->lonc, output_grid->latc, + input_grid_mask, &output_grid_cells, + approx_nxcells_per_ij1, ij2_start, ij2_end); + + nxcells = create_xgrid_2dx2d_order1_acc( nlon_input_cells, nlat_input_cells, + nlon_output_cells, nlat_output_cells, + jlat_overlap_starts, jlat_overlap_ends, + input_grid->lonc, input_grid->latc, + output_grid->lonc, output_grid->latc, + upbound_nxcells, input_grid_mask, &output_grid_cells, + approx_nxcells_per_ij1, ij2_start, ij2_end, + interp); + + free_grid_cell_struct_acc(ncells_output, &output_grid_cells); + free_upbound_nxcells_arrays_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + free_input_grid_mask_acc(ncells_input, &input_grid_mask); + + return 0; + +} + + +void check_ianswers( const int n, const int *answers, const int *checkme) +{ + + for(int i=0 ; i. +#*********************************************************************** + +check_PROGRAMS = test_get_upbound_nxcells_2dx2d + +AM_CFLAGS = $(NETCDF_CFLAGS) \ + -I$(top_srcdir)/tools/fregrid_acc \ + -I$(top_srcdir)/tools/libfrencutils \ + -I$(top_srcdir)/tools/libfrencutils_acc -acc + +LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ + $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ + $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/libfrencutils/libfrencutils.a \ + $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + +test_get_upbound_nxcells_2dx2d_SOURCES = test_get_upbound_nxcells_2dx2d.c + +TESTS = test_get_upbound_nxcells_2dx2d diff --git a/t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c b/t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c new file mode 100644 index 00000000..972e1921 --- /dev/null +++ b/t_acc/test_get_upbound_nxcells_2dx2d/test_get_upbound_nxcells_2dx2d.c @@ -0,0 +1,390 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +// This test tests the function get_upbound_nxcells_2dx2d that is called +// before create_xgrid_2dx2d_order1 create_xgrid_2dx2d_order2. This function +// computes the upper bound value of nxcells (number of exchange grid cells) +// and the bounding parent cell indices for each each exchange grid cell. +// The test checks to ensure data transferred successfully between the host and device +// and all computations have occured as expected on the device. + +#include +#include +#include +#include "globals_acc.h" +#include "conserve_interp_acc.h" +#include "interp_utils_acc.h" +#include "create_xgrid_utils_acc.h" +#include "create_xgrid_acc.h" + +typedef struct { + int ncells_input; + int *approx_nxcells_per_ij1; + int *ij2_start; + int *ij2_end; + int upbound_nxcells; +} Answers; + +void generate_input_is_same_as_output_grids(Grid_config *input_grid, Grid_config *output_grid, Answers *answers); +void generate_two_ouput_cells_per_input_cell(Grid_config *input_grid, Grid_config *output_grid, Answers *answers); +void check_data_on_device(Grid_config *input_grid, Grid_config *output_grid, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Grid_cells_struct_config *output_grid_cells); +int run_tests(Grid_config *input_grid, Grid_config *output_grid, Grid_cells_struct_config *output_grid_cells, + int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end, int *upbound_nxcells); +void cleanup_test(Answers *answers, Grid_config *input_grid, Grid_config *output_grid, + Grid_cells_struct_config *output_grid_cells, int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end); +void check_answers(const Answers *answers, int *approx_nxcells_per_ij1, + int *ij2_start, int *ij2_end, const int upbound_nxcells); +void check_ianswers(const int n, const int *answers, const int *checkme); + +//TODO: add more complicated tests + +int main() +{ + + Grid_config input_grid, output_grid; + Grid_cells_struct_config output_grid_cells; + int upbound_nxcells; + int *approx_nxcells_per_ij1, *ij2_start, *ij2_end; + + Answers answers; + + printf("CHECKING CASE 1: Input grid = Output_grid\n"); + + // generate grids + generate_input_is_same_as_output_grids(&input_grid, &output_grid, &answers); + + //run get_upbound_nxcells_2dx2d + run_tests(&input_grid, &output_grid, &output_grid_cells, &approx_nxcells_per_ij1, &ij2_start, &ij2_end, + &upbound_nxcells); + + // check answers + check_answers(&answers, approx_nxcells_per_ij1, ij2_start, ij2_end, upbound_nxcells); + + //cleanup + cleanup_test(&answers, &input_grid, &output_grid, &output_grid_cells, approx_nxcells_per_ij1, + ij2_start, ij2_end); + + // test is a success + return 0; + +} + +void generate_input_is_same_as_output_grids(Grid_config *input_grid, Grid_config *output_grid, Answers *answers) +{ + + int nlon_cells = 360-1; //[ 0, 1, ... 360] + int nlat_cells = 3-1; //[ -30, 0, 30] + int ncells = nlon_cells * nlat_cells; + int ngridpts = (nlon_cells+1) * (nlat_cells+1); + double dlat = 30.0; + double dlon = 1.0; + + int i=0; + + input_grid->nxc = nlon_cells ; input_grid->nyc = nlat_cells; + output_grid->nxc = nlon_cells ; output_grid->nyc = nlat_cells; + + input_grid->latc = (double *)malloc(ngridpts*sizeof(double)); + input_grid->lonc = (double *)malloc(ngridpts*sizeof(double)); + output_grid->latc = (double *)malloc(ngridpts*sizeof(double)); + output_grid->lonc = (double *)malloc(ngridpts*sizeof(double)); + + for( int ilat=0 ; ilatlatc[i] = latitude; + output_grid->latc[i] = latitude; + input_grid->lonc[i] = (ilon * dlon)*D2R; + output_grid->lonc[i] = (ilon * dlon)*D2R; + i++; + } + } + + //answers + answers->ncells_input = ncells; + answers->upbound_nxcells = ncells; + answers->approx_nxcells_per_ij1 = (int *)malloc(ncells*sizeof(int)); + answers->ij2_start = (int *)malloc(ncells*sizeof(int)); + answers->ij2_end = (int *)malloc(ncells*sizeof(int)); + for( int ij1=0 ; ij1approx_nxcells_per_ij1[ij1] = 1; + answers->ij2_start[ij1] = ij1; + answers->ij2_end[ij1] = ij1; + } + +} + +/* +void generate_two_ouput_cells_per_input_cell(Grid_config *input_grid, Grid_config *output_grid, Answers *answers) +{ + + int nlat_input_cells = 2; //[0, 30, 60] + int nlon_input_cells = 180; //[0, 1, 2...180] + + int nlat_output_cells = 2*nlat_input_cells ; //[0, 15, 30, 45, 60] + int nlon_output_cells = 2*nlon_input_cells ; //[0, 0.5, 1..] + + int ncells_input = nlon_input_cells * nlat_input_cells; + int ncells_output = nlon_output_cells * nlat_output_cells; + int ngridpts_input = (nlon_input_cells+1)*(nlat_input_cells+1); + int ngridpts_output = (nlon_output_cells+1)*(nlat_output_cells+1); + + double dlat_input = 30.0; + double dlon_input = 1.0; + double dlat_output = 15.0; + double dlon_output = 0.5; + + int i=0; + + input_grid->nxc = nlon_input_cells ; input_grid->nyc = nlat_input_cells; + output_grid->nxc = nlon_output_cells ; output_grid->nyc = nlat_output_cells; + + input_grid->latc = (double *)malloc(ngridpts_input*sizeof(double)); + input_grid->lonc = (double *)malloc(ngridpts_input*sizeof(double)); + output_grid->latc = (double *)malloc(ngridpts_output*sizeof(double)); + output_grid->lonc = (double *)malloc(ngridpts_output*sizeof(double)); + + //input grid + for( int ilat=0 ; ilatlatc[ilat] = latitude; + input_grid->lonc[ilat] = (ilon * dlon_input)*D2R ; + } + } + + for(int ilat=0 ; ilatlatc[ilat] = latitude; + output_grid->lonc[ilat] = (ilon *dlon_output) * D2R; + } + } + + //answers + answers->ncells_input = ncells_input; + answers->upbound_nxcells = ncells_output; + answers->approx_nxcells_per_ij1 = (int *)malloc(ncells_input*sizeof(int)); + answers->ij2_start = (int *)malloc(ncells_input*sizeof(int)); + answers->ij2_end = (int *)malloc(ncells_input*sizeof(int)); + for( int ilat=0 ; ilatapprox_nxcells_per_ij1[i] = 4; + answers->ij2_start[i] = (ilat*2)*nlon_output_cells + ilon*2; + answers->ij2_end[i] = (2*ilat+1)*nlon_output_cells + ilon*2+1; + i++; + } + } + +} +*/ + +void check_data_on_device( Grid_config *input_grid, Grid_config *output_grid, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Grid_cells_struct_config *output_grid_cells) +{ + + int nlon_input_cells = input_grid->nxc; + int nlat_input_cells = input_grid->nyc; + int nlon_output_cells = output_grid->nxc; + int nlat_output_cells = output_grid->nyc; + + int input_ngridpts = (nlon_input_cells+1)*(nlat_input_cells+1); + int output_ngridpts = (nlon_output_cells+1)*(nlat_output_cells+1); + int input_ncells = nlon_input_cells * nlat_input_cells; + int output_ncells = nlon_output_cells * nlat_output_cells; + + // grid points should be on device. + if(!acc_is_present(input_grid->lonc, input_ngridpts*sizeof(double))) { + printf("INPUT GRID LON COORDINATES NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(input_grid->latc, input_ngridpts*sizeof(double))) { + printf("INPUT GRID LAT COORDINATES NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid->lonc, output_ngridpts*sizeof(double))) { + printf("OUTPUT GRID LON COORDINATES NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid->latc, output_ngridpts*sizeof(double))) { + printf("OUTPUT GRID LAT COORDINATES NOT ON DEVICE!"); exit(1); + } + + + //arrays used by upbound_nxgrid should be on device + if(!acc_is_present(approx_nxcells_per_ij1, input_ncells*sizeof(int))) { + printf("APPROX_NXCELLS_PER_IJ1 NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(ij2_start, input_ncells*sizeof(int))) { + printf("IJ2_START NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(ij2_end, input_ncells*sizeof(int))) { + printf("IJ2_END NOT ON DEVICE!"); exit(1); + } + + //output_grid_cells information should be on device + if(!acc_is_present(output_grid_cells->lon_min, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LON_MIN NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->lon_max, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LON_MAX NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->lat_min, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LAT_MIN NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->lat_max, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LAT_MAX NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->lon_cent, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LON_CENT NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->area, output_ncells*sizeof(double))) { + printf("OUTPUT_GRID_CELLS AREA NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->nvertices, output_ncells*sizeof(int))) { + printf("OUTPUT_GRID_CELLS NVERTICES NOT ON DEVICE!"); exit(1); + } + + for(int icell=0 ; icelllon_vertices[icell], MAX_V*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LON VERTICES NOT ON DEVICE!"); exit(1); + } + if(!acc_is_present(output_grid_cells->lat_vertices[icell], MAX_V*sizeof(double))) { + printf("OUTPUT_GRID_CELLS LAT VERTICES NOT ON DEVICE!"); exit(1); + } + } + +} + +void check_answers(const Answers *answers, int *approx_nxcells_per_ij1, int *ij2_start, + int *ij2_end, const int upbound_nxcells) +{ + + int ncells_input = answers->ncells_input; + + //double checking answers got copied out + for(int icell=0 ; icellupbound_nxcells), &upbound_nxcells); + + printf("checking approx_nxcells_per_ij1\n"); + check_ianswers(ncells_input, answers->approx_nxcells_per_ij1, approx_nxcells_per_ij1); + + printf("checking ij2_start\n"); + check_ianswers(ncells_input, answers->ij2_start, ij2_start); + + printf("checking ij2_end\n"); + check_ianswers(ncells_input, answers->ij2_end, ij2_end); + +} + +int run_tests(Grid_config *input_grid, Grid_config *output_grid, Grid_cells_struct_config *output_grid_cells, + int **approx_nxcells_per_ij1, int **ij2_start, int **ij2_end, int *upbound_nxcells) +{ + + int nlon_input_cells = input_grid->nxc; + int nlat_input_cells = input_grid->nyc; + int nlon_output_cells = output_grid->nxc; + int nlat_output_cells = output_grid->nyc; + int ncells_input = nlon_input_cells * nlat_input_cells; + int ngridpts_input = (nlon_input_cells+1)*(nlat_input_cells+1); + int ngridpts_output = (nlon_output_cells+1)*(nlat_output_cells+1); + + int jlat_overlap_starts, jlat_overlap_ends; + + int *p_approx_nxcells_per_ij1, *p_ij2_start, *p_ij2_end; + double *input_grid_mask; + + //copy grid to device + copy_grid_to_device_acc(ngridpts_input, input_grid->latc, input_grid->lonc); + copy_grid_to_device_acc(ngridpts_output, output_grid->latc, output_grid->lonc); + + //get mask to skip input cells in creating xgrid + get_input_grid_mask_acc(ncells_input, &input_grid_mask); + if( ! acc_is_present(input_grid_mask, ncells_input*sizeof(double)) ) { + printf("INPUT_GRID_MASK IS NOT ON DEVICE!"); exit(1); + } + + //get output grid cell info + get_grid_cell_struct_acc(nlon_output_cells, nlat_output_cells, output_grid, output_grid_cells); + + //get bounding indices + get_bounding_indices_acc(nlon_output_cells, nlat_output_cells, nlon_input_cells, nlat_input_cells, + output_grid->latc, input_grid->latc, &jlat_overlap_starts, &jlat_overlap_ends); + if(jlat_overlap_starts != 0) printf("SMETHING IS WRONG WITH JLAT_OVERLAP_STARTS %d\n", jlat_overlap_starts); + if(jlat_overlap_ends != nlat_input_cells-1) // this is what was in the original. + printf("SOMETHING IS WRONG WITH JLAT_OVERLAP_ENDS %d %d\n", jlat_overlap_ends, nlat_input_cells-1); + + //malloc and create arrays + create_upbound_nxcells_arrays_on_device_acc(ncells_input, &p_approx_nxcells_per_ij1, &p_ij2_start, &p_ij2_end); + + // check to ensure all data have been transferred/created to device + check_data_on_device(input_grid, output_grid, p_approx_nxcells_per_ij1, p_ij2_start, p_ij2_end, + output_grid_cells); + + *upbound_nxcells = get_upbound_nxcells_2dx2d_acc(input_grid->nxc, input_grid->nyc, output_grid->nxc, output_grid->nyc, + jlat_overlap_starts, jlat_overlap_ends, + input_grid->lonc, input_grid->latc, output_grid->lonc, output_grid->latc, + input_grid_mask, output_grid_cells, + p_approx_nxcells_per_ij1, p_ij2_start, p_ij2_end); + + *approx_nxcells_per_ij1 = p_approx_nxcells_per_ij1; + *ij2_start = p_ij2_start; + *ij2_end = p_ij2_end; + + free_input_grid_mask_acc(ncells_input, &input_grid_mask); + + return 0; + +} + +void cleanup_test(Answers *answers, Grid_config *input_grid, Grid_config *output_grid, + Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end) +{ + + int ncells_output = output_grid->nxc * output_grid->nyc; + int ncells_input = input_grid->nxc * input_grid->nyc; + + free_grid_cell_struct_acc(ncells_output, output_grid_cells); + free_upbound_nxcells_arrays_acc(ncells_input, &approx_nxcells_per_ij1, &ij2_start, &ij2_end); + +} + +void check_ianswers( const int n, const int *answers, const int *checkme) +{ + + for(int i=0 ; i. +#*********************************************************************** + +check_PROGRAMS = test_read_remap_file + +AM_CFLAGS = $(NETCDF_CFLAGS) \ + -I$(top_srcdir)/tools/libfrencutils \ + -I$(top_srcdir)/tools/libfrencutils_acc \ + -I$(top_srcdir)/tools/fregrid_acc -acc + +LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) \ + $(top_builddir)/tools/fregrid_acc/conserve_interp_acc.o \ + $(top_builddir)/tools/fregrid_acc/interp_utils_acc.o \ + $(top_builddir)/tools/libfrencutils/libfrencutils.a \ + $(top_builddir)/tools/libfrencutils_acc/libfrencutils_acc.a + +test_read_remap_file_SOURCES = test_read_remap_file.c + +TESTS = test_read_remap_file_conserve.sh + +EXTRA_DIST = test_read_remap_file_conserve.sh \ + test_make_remap_file.py + + +TESTS_ENVIRONMENT = test_make_remap_file_conserve="$(srcdir)/test_make_remap_file_conserve.py" + +TEST_EXTENSIONS = .sh + +CLEANFILES = *.nc *txt diff --git a/t_acc/test_read_remap_file/test_make_remap_file_conserve.py b/t_acc/test_read_remap_file/test_make_remap_file_conserve.py new file mode 100755 index 00000000..9a948648 --- /dev/null +++ b/t_acc/test_read_remap_file/test_make_remap_file_conserve.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +#*********************************************************************** +# GNU Lesser General Public License +# +# This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). +# +# FRE-NCtools is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any loner version. +# +# FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with FRE-NCTools. If not, see +# . +# ********************************************************************** + +# This python script generates made-up remap files for first order +# and second order interpolation scheme where the number +# output grid tile2 = 2. In addition, this script generate answer +# files in ASCII format. + +import xarray as xr +import numpy as np +import random +import sys + +output_grid_ntiles = 2; + +input_grid_ncells_array = [random.randint(100,500), random.randint(100,500)] +output_grid_ncells_array = input_grid_ncells_array + +nxcells_per_output_tile = [ input_grid_ncells_array[i] * output_grid_ncells_array[i] + for i in range(0,output_grid_ntiles) ] +nxcells2_per_output_tile = [ 2*nxcells_per_output_tile[i] for i in range(0,output_grid_ntiles) ] + +interp_method = int(sys.argv[1]) #1 or 2 + +def write_answers(myfile, answer) : + myfile.write(" ".join(answer)) + myfile.write("\n") + + +for otile in (1,output_grid_ntiles) : + + input_grid_ncells, output_grid_ncells = input_grid_ncells_array[otile-1], output_grid_ncells_array[otile-1] + nxcells, nxcells2 = nxcells_per_output_tile[otile-1], nxcells2_per_output_tile[otile-1] + + #tile1 + tile1_data = [ random.randint(1,6) for i in range(1,nxcells+1) ] + tile1_data_attrs = dict(description="made up numbers", + standard_name="tile_number_in_mosaic1") + tile1 = xr.DataArray( data = tile1_data, dims=('ncells'), attrs=tile1_data_attrs ) + + + #tile1_cell + i_in = [ random.randint(1,input_grid_ncells) for i in range(nxcells) ] + j_in = [ random.randint(1,input_grid_ncells) for i in range(nxcells) ] + tile1_cell_data = np.array( [i_in,j_in] ).flatten('F') + tile1_cell_attrs=dict(description="made up numbers", + standard_name="parent_cell_indices_in_mosaic1") + tile1_cell = xr.DataArray( data=tile1_cell_data, dims=('ncells2'), attrs=tile1_cell_attrs ) + + + #tile2_cell + i_out = [ random.randint(1,output_grid_ncells) for i in range(nxcells) ] + j_out = [ random.randint(1,output_grid_ncells) for i in range(nxcells) ] + tile2_cell_data = np.array( [i_out,j_out] ).flatten('F') + tile2_cell_attrs=dict(description="made up numbers", + standard_name="parent_cell_indices_in_mosaic1") + tile2_cell = xr.DataArray( data=tile2_cell_data, + dims=('ncells2'), + attrs=tile2_cell_attrs ) + + + #xgrid_area + xgrid_area_data = np.array([ random.randint(1,10000)+0.1 for i in range(nxcells) ], dtype=np.float64) + xgrid_area_attrs = dict(description="made up numbers", + standard_name="exchange_grid_area", + units="m2" ) + xgrid_area = xr.DataArray( data=xgrid_area_data, dims=('ncells'), attrs=xgrid_area_attrs ) + + + if( interp_method == 2 ) : + #tile1_distance + di_in = np.array([ random.randint(1,10000)+0.2 for i in range(nxcells) ], dtype=np.float64) + dj_in = np.array([ random.randint(1,10000)+0.3 for i in range(nxcells) ], dtype=np.float64) + tile1_distance_data = np.array( [di_in, dj_in] ).flatten('F') + tile1_distance_attrs=dict(description="Made up numbers", + standard_name="distance_from_parent1_cell_centroid") + tile1_distance = xr.DataArray( data=tile1_distance_data, dims=('ncells2'), attrs=tile1_distance_attrs ) + + + #dataset conserve_order1 + data_vars = {"tile1": tile1, "tile1_cell":tile1_cell, "tile2_cell":tile2_cell, "xgrid_area":xgrid_area} + if( interp_method == 2 ) : data_vars["tile1_distance"] = tile1_distance + remap = xr.Dataset(data_vars=data_vars) + #write netcdf file + remap.to_netcdf(f"remap_conserve{interp_method}.tile{otile}.nc", format="NETCDF4_CLASSIC") + + #write out answer file + myfile = open(f"answers_conserve{interp_method}.tile{otile}.txt", 'w') + + #answer for nxcells + write_answers(myfile,[str(nxcells)]) #total_nxcells + write_answers(myfile, [str(tile1_data.count(i)) for i in range(1,7) ]) #nxcells per gridin tile + + #answer for interp[n].per_intile[m].i_in + # -1 to be consistent with zhi.... + for itile in range(1,7) : + answer = [ str(i_in[i]-1) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + #answer for interp[n].per_intile[m].j_in + # -1 to be consistent with zhi.... + for itile in range(1,7) : + answer = [ str(j_in[i]-1) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + #answer for interp[n].per_intile[m].i_out + # -1 to be consistent with zhi.... + for itile in range(1,7) : + answer = [ str(i_out[i]-1) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + #answer for interp[n].per_intile[m].j_out + # -1 to be consistent with zhi.... + for itile in range(1,7) : + answer = [ str(j_out[i]-1) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + #answer for interp[n].per_intile[m].area + for itile in range(1,7) : + answer = [ str(xgrid_area_data[i]) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + if( interp_method == 2 ) : + #answer for interp[n].per_intile[m].di_in + for itile in range(1,7) : + answer = [ str(di_in[i]) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + #answer for interp[n].per_intile[m].di_in + for itile in range(1,7) : + answer = [ str(dj_in[i]) for i in range(nxcells) if tile1_data[i]==itile ] + write_answers(myfile, answer) + + myfile.close() diff --git a/t_acc/test_read_remap_file/test_read_remap_file.c b/t_acc/test_read_remap_file/test_read_remap_file.c new file mode 100644 index 00000000..fc8a5969 --- /dev/null +++ b/t_acc/test_read_remap_file/test_read_remap_file.c @@ -0,0 +1,473 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any loner version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +// This test tests function read_remap_file_acc to read in a made-up +// remap file. It ensures the correct initialization of the interp_acc struct. +// The remap file is generated with the python script +// test_make_remap_file_conserve.py that uses the xarray module. This +// test also tests the function copy_interp_to_device_acc which copies interp_acc +// to device. + +#include +#include +#include +#include +#include +#include +#include +#include "conserve_interp_acc.h" +#include "interp_utils_acc.h" +#include "globals_acc.h" + +#define INPUT_GRID_NTILES 6 +#define OUTPUT_GRID_NTILES 2 + +double const tolerance = 1.e-7; + +int const input_grid_nlon = 2; +int const output_grid_nlon = 3; + +typedef struct { + char answer_file[15]; + size_t total_nxcells; + int nxcells[INPUT_GRID_NTILES]; + int *input_parent_cell_index[INPUT_GRID_NTILES]; + int *output_parent_cell_index[INPUT_GRID_NTILES]; + double *xcell_area[INPUT_GRID_NTILES]; + double *dcentroid_lon[INPUT_GRID_NTILES]; + double *dcentroid_lat[INPUT_GRID_NTILES]; +} Answers; + +typedef enum { + myCONSERVE_ORDER1, + myCONSERVE_ORDER2 +} myInterp_Method; + +typedef enum { + ON_DEVICE, + ON_HOST +} Where_Am_I; + +char answer_files1[OUTPUT_GRID_NTILES][30] = { "answers_conserve1.tile1.txt", "answers_conserve1.tile2.txt" }; +char answer_files2[OUTPUT_GRID_NTILES][30] = { "answers_conserve2.tile1.txt", "answers_conserve2.tile2.txt" }; + +char remap_files1[OUTPUT_GRID_NTILES][30] = { "remap_conserve1.tile1.nc", "remap_conserve1.tile2.nc" }; +char remap_files2[OUTPUT_GRID_NTILES][30] = { "remap_conserve2.tile1.nc", "remap_conserve2.tile2.nc" }; + +void read_all_answers(Answers *answers, int myinterp_method); +void read_ianswers( FILE *myfile, int *nxcells, int **ianswer); +void read_ranswers( FILE *myfile, int *nxcells, double **ianswer); +void check_answers_on_device(Answers *answers, Interp_config_acc *interp_acc, int myinterp_method); +void check_answers_on_host(Answers *answers, Interp_config_acc *interp_acc, int myinterp_method); +void reset_interp_acc_on_host( Interp_config_acc *interp_acc, Answers *answers, int myinterp_method ); +void check_ianswers(int n, int *answers, int *checkme, int host_or_device); +void check_ranswers(int n, double *answers, double *checkme, int host_or_device); +void error(char *error_message); + +// start program +int main(int argc, char *argv[]) { + + Interp_config_acc interp_acc[OUTPUT_GRID_NTILES]; + Grid_config input_grid[INPUT_GRID_NTILES], output_grid[OUTPUT_GRID_NTILES]; + Answers answers[OUTPUT_GRID_NTILES]; + + unsigned int opcode; + myInterp_Method myinterp_method; + + if( atoi(argv[1]) == 1) { + opcode = CONSERVE_ORDER1; + myinterp_method = myCONSERVE_ORDER1; + } + else if( atoi(argv[1]) == 2 ) { + opcode = CONSERVE_ORDER2; + myinterp_method = myCONSERVE_ORDER2; + } + + // assign number of cells in lon direction to Grid_config + for(int otile=0 ; otile 0 ) error("ERROR with data on device"); + + } + +} +//------------------------------------------ +//------------------------------------------ +void check_ranswers( int n, double *answers, double *checkme, int host_or_device) +{ + + if( host_or_device == ON_HOST ) { + for(int i=0 ; i tolerance ) { + printf("ERROR element %d: %lf vs %lf\n", i, answers[i], checkme[i]); + exit(1); + } + } + } + else { + + int theres_an_error=-99; + + if( !acc_is_present(answers, n*sizeof(int)) ) error("answers are not on device"); + if( !acc_is_present(checkme, n*sizeof(int)) ) error("answers are not on device"); + +#pragma acc data present(answers[:n], checkme[:n]) copy(theres_an_error) +#pragma acc parallel loop independent reduction(+:theres_an_error) + for(int i=0 ; i tolerance ) { + printf("ERROR element %d: %lf vs %lf\n", i, answers[i], checkme[i]); + theres_an_error++; + } + } + if(theres_an_error != -99) error("ERROR with data on device"); + } + +} +//------------------------------------------ +//------------------------------------------ +void error( char *error_message ) +{ + printf("%s\n", error_message); + exit(1); +} diff --git a/t_acc/test_read_remap_file/test_read_remap_file_conserve.sh b/t_acc/test_read_remap_file/test_read_remap_file_conserve.sh new file mode 100755 index 00000000..e9ee82d2 --- /dev/null +++ b/t_acc/test_read_remap_file/test_read_remap_file_conserve.sh @@ -0,0 +1,31 @@ +#!/bin/bash +#*********************************************************************** +#* GNU Lesser General Public License +#* +#* This file is part of the GFDL Flexible Modeling System (FMS). +#* +#* FMS is free software: you can redistribute it and/or modify it under +#* the terms of the GNU Lesser General Public License as published by +#* the Free Software Foundation, either version 3 of the License, or (at +#* your option) any later version. +#* +#* FMS is distributed in the hope that it will be useful, but WITHOUT +#* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +#* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +#* for more details. +#* +#* You should have received a copy of the GNU Lesser General Public +#* License along with FMS. If not, see . +#*********************************************************************** + +#copy python script over +#make cp_test_make_remap_file + +echo -e "\ntest reading remap file for conserve_order1\n" + +${test_make_remap_file_conserve} 1 +./test_read_remap_file 1 + +echo -e "\ntest reading remap file for conserve_order2\n" +${test_make_remap_file_conserve} 2 +./test_read_remap_file 2 diff --git a/tools/fregrid_acc/Makefile.am b/tools/fregrid_acc/Makefile.am index fdbc7101..81feab62 100644 --- a/tools/fregrid_acc/Makefile.am +++ b/tools/fregrid_acc/Makefile.am @@ -26,6 +26,10 @@ LDADD = $(NETCDF_LDFLAGS) $(NETCDF_LIBS) $(RPATH_FLAGS) fregrid_acc_SOURCES = conserve_interp_acc.c \ conserve_interp_acc.h \ + interp_utils_acc.c \ + interp_utils_acc.h \ + fregrid_utils_acc.c \ + fregrid_utils_acc.h \ fregrid_acc.c fregrid_acc_LDADD = $(top_builddir)/tools/fregrid/fregrid_util.o \ diff --git a/tools/fregrid_acc/conserve_interp_acc.c b/tools/fregrid_acc/conserve_interp_acc.c index f37acfbe..6e3d96aa 100644 --- a/tools/fregrid_acc/conserve_interp_acc.c +++ b/tools/fregrid_acc/conserve_interp_acc.c @@ -22,882 +22,604 @@ #include #include #include -#include "constant.h" -#include "globals.h" -#include "create_xgrid.h" -#include "mosaic_util.h" -#include "conserve_interp.h" +#include +#include "globals_acc.h" #include "conserve_interp_acc.h" -#include "fregrid_util.h" +#include "interp_utils_acc.h" +#include "create_xgrid_acc.h" +#include "create_xgrid_utils_acc.h" +#include "general_utils_acc.h" #include "mpp.h" #include "mpp_io.h" #include "read_mosaic.h" -#define AREA_RATIO (1.e-3) -#define MAXVAL (1.e20) -#define TOLERANCE (1.e-10) /******************************************************************************* void setup_conserve_interp Setup the interpolation weight for conservative interpolation *******************************************************************************/ -void setup_conserve_interp_acc(int ntiles_in, const Grid_config *grid_in, int ntiles_out, - Grid_config *grid_out, Interp_config *interp, unsigned int opcode) +void setup_conserve_interp_acc(int ntiles_input_grid, Grid_config *input_grid, int ntiles_output_grid, + Grid_config *output_grid, Interp_config_acc *interp_acc, unsigned int opcode) { - int n, m, i, ii, jj, nx_in, ny_in, nx_out, ny_out, tile; - size_t nxgrid, nxgrid2, nxgrid_prev; - int *i_in=NULL, *j_in=NULL, *i_out=NULL, *j_out=NULL; - int *tmp_t_in=NULL, *tmp_i_in=NULL, *tmp_j_in=NULL, *tmp_i_out=NULL, *tmp_j_out=NULL; - double *tmp_di_in, *tmp_dj_in; - double *xgrid_area=NULL, *tmp_area=NULL, *xgrid_clon=NULL, *xgrid_clat=NULL; - - double garea; - typedef struct{ - double *area; - double *clon; - double *clat; - } CellStruct; - CellStruct *cell_in; - - garea = 4*M_PI*RADIUS*RADIUS; if( opcode & READ) { - for(n=0; n= grid_out[n].isc && - j_out[i] <= grid_out[n].jec && j_out[i] >= grid_out[n].jsc ) - ind[interp[n].nxgrid++] = i; + read_remap_file_acc(ntiles_input_grid, ntiles_output_grid, output_grid, input_grid, interp_acc, opcode); + copy_interp_to_device_acc(ntiles_input_grid, ntiles_output_grid, interp_acc, opcode); + return; + } + + for(int otile=0; otilenxcells; + int nlon_input_cells, ii; + + size_t start[4] = {0,0,0,0}, nwrite[4] = {1, 1, 1, 1}; + int *data_int=NULL; + double *data_double=NULL; + + int fid = mpp_open( interp_acc[otile].remap_file, MPP_WRITE); + int dim_string = mpp_def_dim(fid, "string", STRING); + int dim_ncells = mpp_def_dim(fid, "ncells", nxcells); + int dim_two = mpp_def_dim(fid, "two", 2); + int dims[4] = {dim_ncells, dim_two, 0, 0}; + + int id_tile1 = mpp_def_var(fid, "tile1", NC_INT, 1, &dim_ncells, 1, + "standard_name", "tile_number_in_mosaic1"); + int id_tile1_cell = mpp_def_var(fid, "tile1_cell", NC_INT, 2, dims, 1, + "standard_name", "parent_cell_indices_in_mosaic1"); + int id_tile2_cell = mpp_def_var(fid, "tile2_cell", NC_INT, 2, dims, 1, + "standard_name", "parent_cell_indices_in_mosaic2"); + int id_xgrid_area = mpp_def_var(fid, "xgrid_area", NC_DOUBLE, 1, &dim_ncells, 2, + "standard_name", "exchange_grid_area", "units", "m2"); + int id_tile1_dist = (opcode & CONSERVE_ORDER2) ? + mpp_def_var(fid, "tile1_distance", NC_DOUBLE, 2, dims, 1, + "standard_name", "distance_from_parent1_cell_centroid") : 0 ; + + nwrite[0] = nxcells; + mpp_end_def(fid); + + data_int = (int *)malloc(nxcells*sizeof(int)); + data_double = (double *)malloc(nxcells*sizeof(double)); + + //update data on host + for(int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; +#pragma acc update host( p_interp_for_itile->input_parent_cell_index[:itile_nxcells], \ + p_interp_for_itile->output_parent_cell_index[:itile_nxcells], \ + p_interp_for_itile->xcell_area[:itile_nxcells]) +#pragma acc update if(opcode &CONSERVE_ORDER2) host(p_interp_for_itile->dcentroid_lon[:itile_nxcells], \ + p_interp_for_itile->dcentroid_lat[:itile_nxcells]) } - for(n=0; n y_min ) { - if(j < jstart ) jstart = j; - } - if( yy < y_max ) { - if(j > jend ) jend = j; - } - - } - jstart = max(0, jstart-1); - jend = min(ny_in-1, jend+1); - ny_now = jend-jstart+1; - - if(opcode & CONSERVE_ORDER1) { - nxgrid = create_xgrid_2dx2d_order1(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), - grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, - mask, i_in, j_in, i_out, j_out, xgrid_area); - for(i=0; i 0) { - g_i_in = (int *)malloc(g_nxgrid*sizeof(int )); - g_j_in = (int *)malloc(g_nxgrid*sizeof(int )); - g_area = (double *)malloc(g_nxgrid*sizeof(double)); - g_clon = (double *)malloc(g_nxgrid*sizeof(double)); - g_clat = (double *)malloc(g_nxgrid*sizeof(double)); - mpp_gather_field_int (nxgrid, i_in, g_i_in); - mpp_gather_field_int (nxgrid, j_in, g_j_in); - mpp_gather_field_double(nxgrid, xgrid_area, g_area); - mpp_gather_field_double(nxgrid, xgrid_clon, g_clon); - mpp_gather_field_double(nxgrid, xgrid_clat, g_clat); - for(i=0; i 0) { - nxgrid_prev = interp[n].nxgrid; - interp[n].nxgrid += nxgrid; - if(nxgrid_prev == 0 ) { - interp[n].i_in = (int *)malloc(interp[n].nxgrid*sizeof(int )); - interp[n].j_in = (int *)malloc(interp[n].nxgrid*sizeof(int )); - interp[n].i_out = (int *)malloc(interp[n].nxgrid*sizeof(int )); - interp[n].j_out = (int *)malloc(interp[n].nxgrid*sizeof(int )); - interp[n].area = (double *)malloc(interp[n].nxgrid*sizeof(double)); - interp[n].t_in = (int *)malloc(interp[n].nxgrid*sizeof(int )); - for(i=0; i0) */ + //input tile + ii = 0; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; i 0) { - if( fabs(cell_in[n].area[ii]-grid_in[n].cell_area[ii])/grid_in[n].cell_area[ii] < AREA_RATIO ) { - cell_in[n].clon[ii] /= cell_in[n].area[ii]; - cell_in[n].clat[ii] /= cell_in[n].area[ii]; - } - else { - n0 = j*(nx_in+1)+i; n1 = j*(nx_in+1)+i+1; - n2 = (j+1)*(nx_in+1)+i+1; n3 = (j+1)*(nx_in+1)+i; - x1_in[0] = grid_in[n].lonc[n0]; y1_in[0] = grid_in[n].latc[n0]; - x1_in[1] = grid_in[n].lonc[n1]; y1_in[1] = grid_in[n].latc[n1]; - x1_in[2] = grid_in[n].lonc[n2]; y1_in[2] = grid_in[n].latc[n2]; - x1_in[3] = grid_in[n].lonc[n3]; y1_in[3] = grid_in[n].latc[n3]; - n1_in = fix_lon(x1_in, y1_in, 4, M_PI); - lon_in_avg = avgval_double(n1_in, x1_in); - clon = poly_ctrlon(x1_in, y1_in, n1_in, lon_in_avg); - clat = poly_ctrlat (x1_in, y1_in, n1_in ); - cell_in[n].clon[ii] = clon/grid_in[n].cell_area[ii]; - cell_in[n].clat[ii] = clat/grid_in[n].cell_area[ii]; - } - } - } + mpp_put_var_value(fid, id_tile1, data_int); + + // i (x, lon) indices of input parent + ii=0; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + nlon_input_cells = input_grid[itile].nxc; + for( int i=0 ; iinput_parent_cell_index[i]%nlon_input_cells+1; + ii++; } - for(n=0; ninput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; ioutput_parent_cell_index[i]%nlon_input_cells+1; + ii++; } - - /* free the memory */ - for(n=0; ninput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + nlon_input_cells = input_grid[itile].nxc; + for( int i=0 ; iinput_parent_cell_index[i]/nlon_input_cells+1; + ii++; } - free(cell_in); } - if( opcode & WRITE) { /* write out remapping information */ - for(n=0; n 0) { - size_t start[4], nwrite[4]; - int fid, dim_string, dim_ncells, dim_two, dims[4]; - int id_xgrid_area, id_tile1_dist; - int id_tile1_cell, id_tile2_cell, id_tile1; - int *gdata_int, *ldata_int; - double *gdata_dbl; - - fid = mpp_open( interp[n].remap_file, MPP_WRITE); - dim_string = mpp_def_dim(fid, "string", STRING); - dim_ncells = mpp_def_dim(fid, "ncells", nxgrid); - dim_two = mpp_def_dim(fid, "two", 2); - dims[0] = dim_ncells; dims[1] = dim_two; - id_tile1 = mpp_def_var(fid, "tile1", NC_INT, 1, &dim_ncells, 1, - "standard_name", "tile_number_in_mosaic1"); - id_tile1_cell = mpp_def_var(fid, "tile1_cell", NC_INT, 2, dims, 1, - "standard_name", "parent_cell_indices_in_mosaic1"); - id_tile2_cell = mpp_def_var(fid, "tile2_cell", NC_INT, 2, dims, 1, - "standard_name", "parent_cell_indices_in_mosaic2"); - id_xgrid_area = mpp_def_var(fid, "xgrid_area", NC_DOUBLE, 1, &dim_ncells, 2, - "standard_name", "exchange_grid_area", "units", "m2"); - if(opcode & CONSERVE_ORDER2) id_tile1_dist = mpp_def_var(fid, "tile1_distance", NC_DOUBLE, 2, dims, 1, - "standard_name", "distance_from_parent1_cell_centroid"); - mpp_end_def(fid); - for(i=0; i<4; i++) { - start[i] = 0; nwrite[i] = 1; - } - nwrite[0] = nxgrid; - gdata_int = (int *)malloc(nxgrid*sizeof(int)); - if(interp[n].nxgrid>0) ldata_int = (int *)malloc(interp[n].nxgrid*sizeof(int)); - mpp_gather_field_int(interp[n].nxgrid, interp[n].t_in, gdata_int); - for(i=0; i0)free(ldata_int); - - gdata_dbl = (double *)malloc(nxgrid*sizeof(double)); - mpp_gather_field_double(interp[n].nxgrid, interp[n].area, gdata_dbl); - mpp_put_var_value(fid, id_xgrid_area, gdata_dbl); - - if(opcode & CONSERVE_ORDER2) { - start[1] = 0; - mpp_gather_field_double(interp[n].nxgrid, interp[n].di_in, gdata_dbl); - mpp_put_var_value_block(fid, id_tile1_dist, start, nwrite, gdata_dbl); - start[1] = 1; - mpp_gather_field_double(interp[n].nxgrid, interp[n].dj_in, gdata_dbl); - mpp_put_var_value_block(fid, id_tile1_dist, start, nwrite, gdata_dbl); - } + mpp_put_var_value_block(fid, id_tile1_cell, start, nwrite, data_int); + + // j (y, lat) indices of output parent + ii=0; + nlon_input_cells = output_grid[otile].nxc; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; ioutput_parent_cell_index[i]/nlon_input_cells+1; + ii++; + } + } + mpp_put_var_value_block(fid, id_tile2_cell, start, nwrite, data_int); + + // exchange cell area + ii=0; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; ixcell_area[i]; + ii++; + } + } + mpp_put_var_value(fid, id_xgrid_area, data_double); - free(gdata_dbl); - mpp_close(fid); + if(opcode & CONSERVE_ORDER2) { + ii=0; start[1] = 0 ; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; idcentroid_lon[i]; + ii++; + } + } + mpp_put_var_value_block(fid, id_tile1_dist, start, nwrite, data_double); + + ii=0; start[1] = 1 ; + for( int itile=0 ; itileinput_tile+itile; + int itile_nxcells = p_interp_for_itile->nxcells; + for( int i=0 ; idcentroid_lat[i]; + ii++; } } + mpp_put_var_value_block(fid, id_tile1_dist, start, nwrite, data_double); } - if(mpp_pe() == mpp_root_pe())printf("NOTE: done calculating index and weight for conservative interpolation\n"); - } - /* check the input area match exchange grid area */ - if(opcode & CHECK_CONSERVE) { - int nx1, ny1, max_i, max_j, i, j; - double max_ratio, ratio_change; - double *area2; + free(data_int) ; data_int = NULL; + free(data_double); data_double=NULL; - /* sum over exchange grid to get the area of grid_in */ - nx1 = grid_out[0].nxc; - ny1 = grid_out[0].nyc; + mpp_close(fid); - area2 = (double *)malloc(nx1*ny1*sizeof(double)); + }//ntiles_out - for(n=0; n max_ratio) { - max_ratio = ratio_change; - max_i = i; - max_j = j; - } - if( ratio_change > 1.e-4 ) { - printf("(i,j)=(%d,%d), change = %g, area1=%g, area2=%g\n", i, j, ratio_change, grid_out[n].cell_area[ii],area2[ii]); - } - } - ii = max_j*nx1+max_i; - printf("The maximum ratio change at (%d,%d) = %g, area1=%g, area2=%g\n", max_i, max_j, max_ratio, grid_out[n].cell_area[ii],area2[ii]); +} - } +void check_area_conservation(const int ntiles_output_grid, const int ntiles_input_grid, Grid_config *output_grid, + Interp_config_acc *interp_acc) +{ - free(area2); + for(int otile=0; otilenxcells; + for(int i=0; ioutput_parent_cell_index[i]; + recomputed_output_area[ii] += m_interp->xcell_area[i]; + } + } + + /* compare actual area and recomputed_output_area */ + for(int ij=0 ; ij max_ratio) { + max_ratio = ratio_change; + max_area = recomputed_output_area[ij]; + max_ij = ij; + } + if( ratio_change > 1.e-4 ) { + printf("(i,j)=(%d,%d), change = %g, area1=%g, recomputed_output_area=%g\n", + ij%nlon_output_cells, ij/nlon_output_cells, ratio_change, output_grid[otile].cell_area[ij],recomputed_output_area[ij]); + } + } + printf("The maximum ratio change at (%d,%d) = %g, area1=%g, recomputed_output_area=%g\n", + max_ij%nlon_output_cells, max_ij/nlon_output_cells, max_ratio, output_grid[otile].cell_area[max_ij], max_area); + + free(recomputed_output_area); recomputed_output_area = NULL; + }//for each output tile + +} /******************************************************************************* void do_scalar_conserve_interp( ) doing conservative interpolation *******************************************************************************/ -void do_scalar_conserve_interp_acc(Interp_config *interp, int varid, int ntiles_in, const Grid_config *grid_in, - int ntiles_out, const Grid_config *grid_out, const Field_config *field_in, - Field_config *field_out, unsigned int opcode, int nz) +void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int ntiles_input_grid, const Grid_config *input_grid, + int ntiles_output_grid, const Grid_config *output_grid, const Field_config *field_in, + Field_config *field_out, unsigned int opcode) { - int nx1, ny1, nx2, ny2, i1, j1, i2, j2, tile, n, m, i, j, n1, n2; - int k, n0; - int has_missing, halo, interp_method; - int weight_exist; - int cell_measures, cell_methods; - double area, missing, di, dj, area_missing; - double *out_area; - int *out_miss; - double gsum_out; - int monotonic; - int target_grid; - Monotone_config *monotone_data; - - gsum_out = 0; - interp_method = field_in->var[varid].interp_method; - halo = 0; - monotonic = 0; - if(interp_method == CONSERVE_ORDER2) { - halo = 1; - monotonic = opcode & MONOTONIC; - } - - area_missing = field_in->var[varid].area_missing; - has_missing = field_in->var[varid].has_missing; - weight_exist = grid_in[0].weight_exist; - cell_measures = field_in->var[varid].cell_measures; - cell_methods = field_in->var[varid].cell_methods; - target_grid = opcode & TARGET; - if( field_in->var[varid].use_volume ) target_grid = 0; - - missing = -MAXVAL; - if(has_missing) missing = field_in->var[varid].missing; - - if( nz>1 && has_missing ) mpp_error("conserve_interp: has_missing should be false when nz > 1"); - if( nz>1 && cell_measures ) mpp_error("conserve_interp: cell_measures should be false when nz > 1"); - if( nz>1 && cell_methods == CELL_METHODS_SUM ) mpp_error("conserve_interp: cell_methods should not be sum when nz > 1"); - /* if( nz>1 && monotonic ) mpp_error("conserve_interp: monotonic should be false when nz > 1"); */ - - if(monotonic) monotone_data = (Monotone_config *)malloc(ntiles_in*sizeof(Monotone_config)); - - for(m=0; mvar[varid].cell_measures; + int cell_methods = field_in->var[varid].cell_methods; + int target_grid = ( field_in->var[varid].use_volume ) ? 0 : (opcode & TARGET); + int has_missing = field_in->var[varid].has_missing; + double missing = (has_missing) ? field_in->var[varid].missing : -MAXVAL; + double gsum_out=0.0; + + for(int otile=0; otilevar[varid].name,tile,i1,j1,i2,j2); - mpp_error("conserve_interp: data is not missing but area is missing"); - } - area *= (field_in[tile].area[n1]/grid_in[tile].cell_area[n1]); - } - field_out[m].data[n0] += (field_in[tile].data[n1]*area); - out_area[n0] += area; - out_miss[n0] = 1; - } - } - } - else { - for(n=0; n monotone_data[n].f_bar_max[n1] ) monotone_data[n].f_bar_max[n1] = field_in[n].data[n2]; - if( field_in[n].data[n2] < monotone_data[n].f_bar_min[n1] ) monotone_data[n].f_bar_min[n1] = field_in[n].data[n2]; - } - } - } - } - xdata = (double *)malloc(interp[m].nxgrid*sizeof(double)); - for(n=0; n monotone_data[tile].f_max[n1]) monotone_data[tile].f_max[n1] = xdata[n]; - if( xdata[n] < monotone_data[tile].f_min[n1]) monotone_data[tile].f_min[n1] = xdata[n]; - } - } - else - xdata[n] = missing; - } + for(int itile=0 ; itile1) { - for(n=0; n monotone_data[tile].f_bar_max[n1] ) { - /* z1l: Due to truncation error, we might get xdata[n] > f_bar_max[n1]. So - we allow some tolerance. What is the suitable tolerance? */ - xdata[n] = f_bar + ((xdata[n]-f_bar)/(monotone_data[tile].f_max[n1]-f_bar)) - * (monotone_data[tile].f_bar_max[n1]-f_bar); - if( xdata[n] > monotone_data[tile].f_bar_max[n1]) { - if(xdata[n] - monotone_data[tile].f_bar_max[n1] < TOLERANCE ) xdata[n] = monotone_data[tile].f_bar_max[n1]; - if( xdata[n] > monotone_data[tile].f_bar_max[n1]) { - printf(" n = %d, n1 = %d, xdata = %f, f_bar_max=%f\n", n, n1, xdata[n], monotone_data[tile].f_bar_max[n1]); - mpp_error(" xdata is greater than f_bar_max "); - } - } - } - else if( monotone_data[tile].f_min[n1] < monotone_data[tile].f_bar_min[n1] ) { - /* z1l: Due to truncation error, we might get xdata[n] < f_bar_min[n1]. So - we allow some tolerance. What is the suitable tolerance? */ - xdata[n] = f_bar + ((xdata[n]-f_bar)/(monotone_data[tile].f_min[n1]-f_bar)) * (monotone_data[tile].f_bar_min[n1]-f_bar); - if( xdata[n] < monotone_data[tile].f_bar_min[n1]) { - if(monotone_data[tile].f_bar_min[n1] - xdata[n]< TOLERANCE ) xdata[n] = monotone_data[tile].f_bar_min[n1]; - if( xdata[n] < monotone_data[tile].f_bar_min[n1]) { - printf(" n = %d, n1 = %d, xdata = %f, f_bar_min=%f\n", n, n1, xdata[n], monotone_data[tile].f_bar_min[n1]); - mpp_error(" xdata is less than f_bar_min "); - } - } - } - } - for(n=0; nvar[varid].name,tile,i1,j1,i2,j2); - mpp_error("conserve_interp: data is not missing but area is missing"); - } - area *= (field_in[tile].area[n1]/grid_in[tile].cell_area[n1]); - } - if(field_in[tile].grad_mask[n1]) { /* use zero gradient */ - field_out[m].data[n0] += field_in[tile].data[n2]*area; - } - else { - field_out[m].data[n0] += (field_in[tile].data[n2]+field_in[tile].grad_x[n1]*di - +field_in[tile].grad_y[n1]*dj)*area; - } - out_area[n0] += area; - out_miss[n0] = 1; - } - } - } - else { - for(n=0; n 0) gsum_out += field_out[m].data[i]; +#pragma acc enter data copyin(gsum_out) +#pragma acc parallel loop present(out_area[:ncells_output_grid],\ + p_fieldout_data[:ncells_output_grid])\ + reduction(+:gsum_out) + for(int i=0; i 0) gsum_out += p_fieldout_data[i]; } } if ( cell_methods == CELL_METHODS_SUM ) { - for(i=0; i 0) - field_out[m].data[i] /= out_area[i]; - else if(out_miss[i] == 1) - field_out[m].data[i] = 0.0; - else - field_out[m].data[i] = missing; +#pragma acc parallel loop present(out_area[:ncells_output_grid], \ + out_miss[:ncells_output_grid], \ + p_fieldout_data[:ncells_output_grid]) + for(int i=0; i 0) { + p_fieldout_data[i] /= out_area[i]; + } + else { + p_fieldout_data[i] = 0.0; + if(out_miss[i] == 0) p_fieldout_data[i] = missing; + } } if( (target_grid) ) { - for(i=0; inxcells; +#pragma acc parallel loop present(minterp_acc->output_parent_cell_index[:ixcells], \ + minterp_acc->input_parent_cell_index[:ixcells], \ + minterp_acc->xcell_area[:ixcells], \ + out_area[:ncells_output_grid])\ + copyin(p_fieldin_area[:ncells_input_grid],\ + p_gridin_area[:ncells_input_grid]) + for(int ix=0; ixoutput_parent_cell_index[ix]; + int ij1 = minterp_acc->input_parent_cell_index[ix]; + double area = minterp_acc->xcell_area[ix]; + if(cell_measures ) out_area[ij2] += (area*p_fieldin_area[ij1]/p_gridin_area[ij1]); + else out_area[ij2] += area; + } +#pragma acc parallel loop present(p_fieldout_data[:ncells_output_grid], out_area[:ncells_output_grid], \ + output_grid[otile].cell_area[:ncells_output_grid]) + for(int i=0; inxc * input_grid->nyc; + double *p_gridin_area = NULL; + double *p_fieldin_area = NULL; + double *p_weight = NULL; + + p_gridin_area = input_grid->cell_area; + p_fieldin_area = field_in->area; + p_weight = input_grid->weight; + + if(cell_methods == CELL_METHODS_SUM) { +#pragma acc parallel loop present(input_area_weight[:ncells_input_grid]) copyin(p_gridin_area[:ncells_input_grid]) + for(int i=0 ; inxcells; + int ncells_input_grid = input_grid->nxc * input_grid->nyc; + int ncells_output_grid = output_grid->nxc * output_grid->nyc; + +#pragma acc data present(minterp_acc[:1], \ + minterp_acc->input_parent_cell_index[:nxcells], \ + minterp_acc->output_parent_cell_index[:nxcells], \ + minterp_acc->xcell_area[:nxcells], \ + input_area_weight[:ncells_input_grid], \ + fieldout_data[:ncells_output_grid], \ + out_area[:ncells_output_grid], \ + out_miss[:ncells_output_grid]) \ + copyin(fieldin_data[:ncells_input_grid]) +#pragma acc parallel loop + for(int ix=0; ixinput_parent_cell_index[ix]; + int ij2 = minterp_acc->output_parent_cell_index[ix]; + double area = minterp_acc->xcell_area[ix]; + + if( fieldin_data[ij1] == missing ) continue; + + area *= input_area_weight[ij1] ; +#pragma acc atomic update + fieldout_data[ij2] += fieldin_data[ij1]*area ; +#pragma acc atomic update + out_area[ij2] += area; + out_miss[ij2] = 1; + } + +} + +void interp_data_order2( const Grid_config *output_grid, const Grid_config *input_grid, + Interp_per_input_tile *minterp_acc, double *input_area_weight, double *fieldin_data, + double *fieldout_data, double *out_area, int *out_miss, + int *grad_mask, double *grad_y, double *grad_x, double missing) +{ + + int nxcells = minterp_acc->nxcells; + int n_halo_cells = 2; + int input_nlon_cells = input_grid->nxc; + int input_nlat_cells = input_grid->nyc; + int input_data_ncells = (input_nlon_cells+n_halo_cells)*(input_nlat_cells+n_halo_cells); + int ncells_input_grid = input_nlon_cells * input_nlat_cells; + + int output_nlon_cells = output_grid->nxc; + int ncells_output_grid = output_nlon_cells * (output_grid->nyc); + +#pragma acc data present( minterp_acc[:1], \ + minterp_acc->input_parent_cell_index[:nxcells], \ + minterp_acc->output_parent_cell_index[:nxcells], \ + minterp_acc->xcell_area[:nxcells], \ + input_area_weight[:ncells_input_grid], \ + fieldout_data[:ncells_output_grid], \ + out_area[:ncells_output_grid], \ + out_miss[:ncells_output_grid]) \ + copyin(fieldin_data[:input_data_ncells], \ + grad_mask[:ncells_input_grid], \ + grad_x[:ncells_input_grid], grad_y[:ncells_input_grid]) +#pragma acc parallel loop + for(int ix=0; ixinput_parent_cell_index[ix]; + int ij2 = minterp_acc->output_parent_cell_index[ix]; + double area = minterp_acc->xcell_area[ix]; + double dx = minterp_acc->dcentroid_lon[ix]; + double dy = minterp_acc->dcentroid_lat[ix]; + + int i1 = ij1%input_nlon_cells; + int j1=ij1/input_nlon_cells; + int data_pt = (j1+1)*(input_nlon_cells+n_halo_cells)+(i1+1); + + if( fieldin_data[data_pt] == missing ) continue; + + area *= input_area_weight[ij1] ; +#pragma acc atomic update + fieldout_data[ij2] += (fieldin_data[data_pt] + (1-grad_mask[ij1])* + (grad_x[ij1]*dx + grad_y[ij1]*dy) )*area; +#pragma acc atomic update + out_area[ij2] += area; + out_miss[ij2] = 1; + } + +} /******************************************************************************* - void do_vector_conserve_interp( ) - doing conservative interpolation +void get_bounding_indices +gets indices for kat that overlap with the ref_grid_lat +TODO: THIS FUNCTION NEEDS A UNIT TEST *******************************************************************************/ -void do_vector_conserve_interp_acc(Interp_config *interp, int varid, int ntiles_in, const Grid_config *grid_in, int ntiles_out, - const Grid_config *grid_out, const Field_config *u_in, const Field_config *v_in, - Field_config *u_out, Field_config *v_out, unsigned int opcode) +void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells, + const int nlon_cells, const int nlat_cells, + const double *ref_grid_lat, const double *grid_lat, + int *overlap_starts_here_index, int *overlap_ends_here_index) { - int nx1, ny1, nx2, ny2, i1, j1, i2, j2, tile, n, m, i; - double area, missing, tmp_x, tmp_y; - double *out_area; - - missing = u_in->var[varid].missing; - /* first rotate input data */ - for(n = 0; n < ntiles_in; n++) { - if(grid_in[n].rotate) { - nx1 = grid_in[n].nx; - ny1 = grid_in[n].ny; - for(i=0; i ref_min_lat ) overlap_starts_here_index_tmp = min(overlap_starts_here_index_tmp, jlat); + if( lat < ref_max_lat ) overlap_ends_here_index_tmp = max(overlap_ends_here_index_tmp, jlat); } } - for(m=0; m 0) { - u_out[m].data[i] /= grid_out[m].area[i]; - v_out[m].data[i] /= grid_out[m].area[i]; - } - else { - u_out[m].data[i] = missing; - v_out[m].data[i] = missing; - } - } - } - else { - for(i=0; i 0) { - u_out[m].data[i] /= out_area[i]; - v_out[m].data[i] /= out_area[i]; - } - else { - u_out[m].data[i] = missing; - v_out[m].data[i] = missing; - } - } - } - /* rotate the data if needed */ - if(grid_out[m].rotate) { - for(i=0; iinput_parent_cell_index = (int *)malloc(nxcells *sizeof(int)); + interp_per_itile->output_parent_cell_index = (int *)malloc(nxcells *sizeof(int)); + interp_per_itile->xcell_area = (double *)malloc(nxcells * sizeof(double)); + + if(opcode & CONSERVE_ORDER2) { + interp_per_itile->dcentroid_lon = (double *)malloc(nxcells*sizeof(double)); + interp_per_itile->dcentroid_lat = (double *)malloc(nxcells*sizeof(double)); + } + +#pragma acc enter data create(interp_per_itile) +#pragma acc enter data create(interp_per_itile->input_parent_cell_index[:nxcells], \ + interp_per_itile->output_parent_cell_index[:nxcells], \ + interp_per_itile->xcell_area[:nxcells]) + if(opcode & CONSERVE_ORDER2) { +#pragma acc enter data create(interp_per_itile->dcentroid_lon[:nxcells], \ + interp_per_itile->dcentroid_lat[:nxcells]) } -}; /* do_vector_conserve_interp */ +} diff --git a/tools/fregrid_acc/conserve_interp_acc.h b/tools/fregrid_acc/conserve_interp_acc.h index 071a8123..a5020c6e 100644 --- a/tools/fregrid_acc/conserve_interp_acc.h +++ b/tools/fregrid_acc/conserve_interp_acc.h @@ -20,15 +20,44 @@ #ifndef CONSERVE_INTERP_ACC_H_ #define CONSERVE_INTERP_ACC_H_ -#include "globals.h" +#include "globals_acc.h" -void setup_conserve_interp_acc(int ntiles_in, const Grid_config *grid_in, int ntiles_out, - Grid_config *grid_out, Interp_config *interp, unsigned int opcode); -void do_scalar_conserve_interp_acc(Interp_config *interp, int varid, int ntiles_in, const Grid_config *grid_in, +void setup_conserve_interp_acc(int ntiles_in, Grid_config *grid_in, int ntiles_out, + Grid_config *grid_out, Interp_config_acc *interp_acc, unsigned int opcode); + +void do_scalar_conserve_interp_acc(Interp_config_acc *interp_acc, int varid, int ntiles_in, const Grid_config *grid_in, int ntiles_out, const Grid_config *grid_out, const Field_config *field_in, - Field_config *field_out, unsigned int opcode, int nz); -void do_vector_conserve_interp_acc(Interp_config *interp, int varid, int ntiles_in, const Grid_config *grid_in, int ntiles_out, - const Grid_config *grid_out, const Field_config *u_in, const Field_config *v_in, - Field_config *u_out, Field_config *v_out, unsigned int opcode); + Field_config *field_out, unsigned int opcode); + +void read_remap_file_acc(int ntiles_input_grid, int ntiles_output_grid, + Grid_config *output_grid, Grid_config *input_grid, + Interp_config_acc *interp_acc, unsigned int opcode); + +void write_remap_file(const int ntiles_out, const int ntiles_in, Grid_config *output_grid, + Grid_config *input_grid, Interp_config_acc *interp_acc, unsigned int opcode); + +void check_area_conservation(const int ntiles_output_grid, const int ntiles_input_grid, Grid_config *output_grid, + Interp_config_acc *interp_acc); + +void get_input_area_weight(const int weights_exist, const int cell_measures, const int cell_methods, + const Field_config *field_in, const Grid_config *input_grid, + double *input_area_weight); + +void interp_data_order1(const Grid_config *output_grid, const Grid_config *input_grid, + Interp_per_input_tile *interp_for_itile, double *input_area_weight, double *fieldin_data, + double *fieldout_data, double *out_area, int *out_miss, double missing); + +void interp_data_order2( const Grid_config *output_grid, const Grid_config *input_grid, + Interp_per_input_tile *interp_for_itile, double *input_area_weight, double *fieldin_data, + double *fieldout_data, double *out_area, int *out_miss, + int *grad_mask, double *grad_y, double *grad_x, double missing); + +void get_bounding_indices_acc(const int ref_nlon_cells, const int ref_nlat_cells, + const int nlon_cells, const int nlat_cells, + const double *ref_grid_lat, const double *grid_lat, + int *overlap_starts_here_index, int *overlap_ends_here_index); + +void create_interp_acc_itile_arrays_on_device_acc(const int nxcells, const unsigned int opcode, + Interp_per_input_tile *interp_per_itile); #endif diff --git a/tools/fregrid_acc/fregrid_acc.c b/tools/fregrid_acc/fregrid_acc.c index fd1f11d5..fe1bbf3e 100644 --- a/tools/fregrid_acc/fregrid_acc.c +++ b/tools/fregrid_acc/fregrid_acc.c @@ -39,21 +39,25 @@ 675 Mass Ave, Cambridge, MA 02139, USA. ----------------------------------------------------------------------- */ + +// TODO: vector and monotonic interpolation have not been implemented yet. +// TODO: great_circle first order exchange grid creation has not been implemented yet. + #include #include #include #include #include #include -#include "globals.h" -#include "constant.h" +#include "globals_acc.h" #include "read_mosaic.h" #include "mpp_io.h" #include "mpp.h" #include "mosaic_util.h" -#include "conserve_interp.h" +#include "conserve_interp_acc.h" #include "bilinear_interp.h" #include "fregrid_util.h" +#include "fregrid_utils_acc.h" char *usage[] = { "", @@ -65,7 +69,7 @@ char *usage[] = { " [--LstepBegin #integer] [--LstepEnd #integer] ", " [--output_file output_file] [--input_dir input_dir] ", " [--output_dir output_dir] [--remap_file remap_file] ", - " [--interp_method method] [--grid_type grid_type] [--test_case test_case] ", + " [--interp_method method] [--grid_type grid_type] ", " [--symmetry] [--target_grid] [--finer_step #] [--fill_missing] ", " [--center_y] [--check_conserve] [--weight_file weight_file] ", " [--weight_field --weight_field] [--dst_vgrid dst_vgrid] ", @@ -182,7 +186,7 @@ char *usage[] = { " ", "--interp_method interp_method specify the remapping algorithm to be used. Default is ", " 'conserve_order1'. Currently only 'conserve_order1', ", - " 'conserve_order2', 'conserve_order2_monotonic' and ", + " 'conserve_order2', and ", " 'bilinear' remapping scheme are ", " implemented in this tool. The bilinear scheme can only ", " be used to remap data from cubic grid to regular latlon ", @@ -191,8 +195,6 @@ char *usage[] = { " will be located at the center of cell or bound of the ", " cell depending on the setting of y_center. ", " ", - "--test_case test_case specify the test function to be used for testing. ", - " ", "--grid_type grid_type specify the vector field grid location. default is ", " AGRID and only AGRID is implemented yet. ", " ", @@ -221,9 +223,6 @@ char *usage[] = { " The area sum will be printed out for input and output ", " mosaic. ", " ", - "--monotonic When specified, use monotonic interpolation when ", - " interp_method is 'conserve_order2'. ", - " ", "--weight_file Specify the filename that store weight_field. The ", " suffix '.tile#.nc' should not present for multiple-tile ", " files. weight_field is used to adjust the source weight.", @@ -258,8 +257,6 @@ char *usage[] = { " format is not specified, will use the format of ", " input_file. ", " ", - "--nthreads # Specify number of OpenMP threads. ", - " ", "--deflation # If using NetCDF4 , use deflation of level #. ", " Defaults to input file settings. ", " ", @@ -302,8 +299,6 @@ int main(int argc, char* argv[]) char scalar_name_remap[NVAR][STRING]; char u_name [NVAR] [STRING]; char v_name [NVAR] [STRING]; - char *test_case = NULL; - double test_param = 1; char *associated_file_dir = NULL; int check_conserve = 0; /* 0 means no check */ double lonbegin = 0, lonend = 360; @@ -319,7 +314,6 @@ int main(int argc, char* argv[]) unsigned int nscalar_orig; int option_index, c, i, n, m, l; char entry[MAXSTRING]; /* should be long enough */ - char txt[STRING]; char history[MAXATT]; int fill_missing = 0; int extrapolate = 0; @@ -352,9 +346,8 @@ int main(int argc, char* argv[]) File_config *file2_in = NULL; /* store input file information */ File_config *file2_out = NULL; /* store output file information */ Bound_config *bound_T = NULL; /* store halo update information for T-cell*/ - Interp_config *interp = NULL; /* store remapping information */ + Interp_config_acc *interp_acc = NULL; /* store remapping information */ int save_weight_only = 0; - int nthreads = 1; double time_get_in_grid=0, time_get_out_grid=0, time_get_input=0; double time_setup_interp=0, time_do_interp=0, time_write=0; @@ -371,9 +364,7 @@ int main(int argc, char* argv[]) {"input_file", required_argument, NULL, 'e'}, {"output_file", required_argument, NULL, 'f'}, {"remap_file", required_argument, NULL, 'g'}, - {"test_case", required_argument, NULL, 'i'}, {"interp_method", required_argument, NULL, 'j'}, - {"test_parameter", required_argument, NULL, 'k'}, {"symmetry", no_argument, NULL, 'l'}, {"grid_type", required_argument, NULL, 'm'}, {"target_grid", no_argument, NULL, 'n'}, @@ -401,7 +392,6 @@ int main(int argc, char* argv[]) {"stop_crit", required_argument, NULL, 'N'}, {"standard_dimension", no_argument, NULL, 'O'}, {"debug", no_argument, NULL, 'P'}, - {"nthreads", required_argument, NULL, 'Q'}, {"associated_file_dir", required_argument, NULL, 'R'}, {"deflation", required_argument, NULL, 'S'}, {"shuffle", required_argument, NULL, 'T'}, @@ -459,12 +449,6 @@ int main(int argc, char* argv[]) case 'j': strcpy(interp_method, optarg); break; - case 'i': - test_case = optarg; - break; - case 'k': - test_param = atof(optarg); - break; case 'l': opcode |= SYMMETRY; break; @@ -543,9 +527,6 @@ int main(int argc, char* argv[]) case 'P': debug = 1; break; - case 'Q': - nthreads = atoi(optarg); - break; case 'R': associated_file_dir = optarg; break; @@ -588,29 +569,26 @@ int main(int argc, char* argv[]) if(mpp_pe() == mpp_root_pe())printf("****fregrid: second order conservative scheme will be used for regridding.\n"); opcode |= CONSERVE_ORDER2; } - else if(!strcmp(interp_method, "conserve_order2_monotonic") ) { - if(mpp_pe() == mpp_root_pe())printf("****fregrid: second order monotonic conservative scheme will be used for regridding.\n"); - opcode |= CONSERVE_ORDER2; - opcode |= MONOTONIC; - } else if(!strcmp(interp_method, "bilinear") ) { if(mpp_pe() == mpp_root_pe())printf("****fregrid: bilinear remapping scheme will be used for regridding.\n"); opcode |= BILINEAR; } else - mpp_error("fregrid: interp_method must be 'conserve_order1', 'conserve_order2', 'conserve_order2_monotonic' or 'bilinear'"); + mpp_error("fregrid: interp_method must be 'conserve_order1', 'conserve_order2', or 'bilinear'"); save_weight_only = 0; if( nfiles == 0) { if(nvector > 0 || nscalar > 0 || nvector2 > 0) mpp_error("fregrid: when --input_file is not specified, --scalar_field, --u_field and --v_field should also not be specified"); - if(!remap_file) mpp_error("fregrid: when --input_file is not specified, remap_file must be specified to save weight information"); + if(!remap_file) + mpp_error("fregrid: when --input_file is not specified, remap_file must be specified to save weight information"); save_weight_only = 1; if(mpp_pe()==mpp_root_pe())printf("NOTE: No input file specified in this run, no data file will be regridded " "and only weight information is calculated.\n"); } else if( nfiles == 1 || nfiles ==2) { - if( nvector != nvector2 ) mpp_error("fregrid: number of fields specified in u_field must be the same as specified in v_field"); + if( nvector != nvector2 ) + mpp_error("fregrid: number of fields specified in u_field must be the same as specified in v_field"); if( nscalar+nvector==0 ) mpp_error("fregrid: both scalar_field and vector_field are not specified"); /* when nvector =2 and nscalar=0, nfiles can be 2 otherwise nfiles must be 1 */ if( nscalar && nfiles != 1 ) @@ -637,7 +615,7 @@ int main(int argc, char* argv[]) if(nvector >0) mpp_error("fregrid: weight_field should not be specified for vector interpolation, contact developer"); if(!weight_file) { - if(nfiles==0) mpp_error("fregrid: weight_field is specified, but both weight_file and input_file are not specified"); + if(nfiles==0) mpp_error("fregrid: weight_field is specified, but weight_file and input_file are not specified"); if(dir_in) sprintf(wt_file_obj, "%s/%s", dir_in, input_file[0]); else @@ -648,16 +626,12 @@ int main(int argc, char* argv[]) if(nvector > 0) { opcode |= VECTOR; - if(grid_type == AGRID) - opcode |= AGRID; - else if(grid_type == BGRID) - opcode |= BGRID; + if(grid_type == AGRID) opcode |= AGRID; + else if(grid_type == BGRID) opcode |= BGRID; } - if (shuffle < -1 || shuffle > 1) - mpp_error("fregrid: shuffle must be 0 (off) or 1 (on)"); - if (deflation < -1 || deflation > 9) - mpp_error("fregrid: deflation must be between 0 (off) and 9"); + if (shuffle < -1 || shuffle > 1) mpp_error("fregrid: shuffle must be 0 (off) or 1 (on)"); + if (deflation < -1 || deflation > 9) mpp_error("fregrid: deflation must be between 0 (off) and 9"); /* define history to be the history in the grid file */ strcpy(history,argv[0]); @@ -667,11 +641,11 @@ int main(int argc, char* argv[]) if(strlen(argv[i]) > MAXENTRY) { /* limit the size of each entry, here we are assume the only entry that is longer than MAXENTRY= 256 is the option --scalar_field --u_field and v_field */ if(strcmp(argv[i-1], "--scalar_field") && strcmp(argv[i-1], "--u_field") && strcmp(argv[i-1], "--v_field") ) - mpp_error("fregrid: the entry ( is not scalar_field, u_field, v_field ) is too long, need to increase parameter MAXENTRY"); + mpp_error("fregrid: the entry ( is not scalar_field, u_field, v_field )\ + is too long, need to increase parameter MAXENTRY"); strcat(history, "(**please see the field list in this file**)" ); } - else - strcat(history, argv[i]); + else strcat(history, argv[i]); } /* get the mosaic information of input and output mosaic*/ @@ -683,33 +657,26 @@ int main(int argc, char* argv[]) if( ntiles_in != 6 && (opcode & CONSERVE_ORDER2) ) mpp_error("fregrid: when the input grid is not cubic sphere grid, interp_method can not be conserve_order2"); + ntiles_out = 1; if(mosaic_out) { fid = mpp_open(mosaic_out, MPP_READ); ntiles_out = mpp_get_dimlen(fid, "ntiles"); mpp_close(fid); } - else - ntiles_out = 1; - if(test_case) { - if(nfiles != 1) mpp_error("fregrid: when test_case is specified, nfiles should be 1"); - sprintf(output_file[0], "%s.%s.output", test_case, interp_method); - } if(check_conserve) opcode |= CHECK_CONSERVE; if( opcode & STANDARD_DIMENSION ) printf("fregrid: --standard_dimension is set\n"); if( opcode & BILINEAR ) { - int ncontact; - ncontact = read_mosaic_ncontacts(mosaic_in); + int ncontact = read_mosaic_ncontacts(mosaic_in); if( nlon == 0 || nlat == 0) mpp_error("fregrid: when interp_method is bilinear, nlon and nlat should be specified"); if(ntiles_in != 6) mpp_error("fregrid: when interp_method is bilinear, the input mosaic should be 6 tile cubic grid"); if(ncontact !=12) mpp_error("fregrid: when interp_method is bilinear, the input mosaic should be 12 contact cubic grid"); if(mpp_npes() > 1) mpp_error("fregrid: parallel is not implemented for bilinear remapping"); } - else - y_at_center = 1; + else y_at_center = 1; if(extrapolate) opcode |= EXTRAPOLATE; @@ -717,7 +684,14 @@ int main(int argc, char* argv[]) grid_in = (Grid_config *)malloc(ntiles_in *sizeof(Grid_config)); grid_out = (Grid_config *)malloc(ntiles_out*sizeof(Grid_config)); bound_T = (Bound_config *)malloc(ntiles_in *sizeof(Bound_config)); - interp = (Interp_config *)malloc(ntiles_out*sizeof(Interp_config)); + //If statement will be removed once bilinear is on the GPU + if( opcode & BILINEAR ) interp_acc = (Interp_config_acc *)malloc(ntiles_out*sizeof(Interp_config_acc)); + else { + interp_acc = (Interp_config_acc *)malloc(ntiles_out*sizeof(Interp_config_acc)); + for(int i=0 ; i EPSLN10 || fabs( grid_in[0].latt[ind0]-grid_in[0].latt[ind1] ) > EPSLN10 ) mpp_error("fregrid: extrapolate is limited to lat-lon grid"); - } + } } /* when vertical_interp is set, extrapolate must be set */ @@ -781,7 +753,8 @@ int main(int argc, char* argv[]) if(extrapolate) mpp_error("fregrid: extrapolate is not supported for vector fields"); } - if(remap_file) set_remap_file(ntiles_out, mosaic_out, remap_file, interp, &opcode, save_weight_only); + if(opcode & BILINEAR && remap_file) mpp_error("does not work yet with bilinear"); + if(remap_file) set_remap_file_acc(ntiles_out, mosaic_out, remap_file, interp_acc, &opcode, save_weight_only); if(!save_weight_only) { file_in = (File_config *)malloc(ntiles_in *sizeof(File_config)); @@ -819,9 +792,7 @@ int main(int argc, char* argv[]) /* filter field with interp_method = "none" */ nscalar = 0; for(n=0; n= 0){ - reset_in_format( in_format_0); - }else{ - printf("WARNING: fregrid could not set in_format"); - } + if(format != NULL) set_in_format(format); + else if (in_format_0 >= 0) reset_in_format( in_format_0); + else printf("WARNING: fregrid could not set in_format"); set_output_metadata(ntiles_in, nfiles, file_in, file2_in, scalar_in, u_in, v_in, ntiles_out, file_out, file2_out, scalar_out, u_out, v_out, grid_out, &vgrid_out, history, tagname, opcode, @@ -933,27 +900,22 @@ int main(int argc, char* argv[]) double dlon_in, dlat_in; double lonbegin_in, latbegin_in; /* when dlon_in is 0, bilinear_interp will use the default 2*M_PI */ - if(fabs(lonend-lonbegin-360) < EPSLN10) - dlon_in = M_PI+M_PI; - else - dlon_in = (lonend-lonbegin)*D2R; - if(fabs(latend-latbegin-180) < EPSLN10) - dlat_in = M_PI; - else - dlat_in = (latend-latbegin)*D2R; - if(fabs(lonbegin) < EPSLN10) - lonbegin_in = 0.0; - else - lonbegin_in = lonbegin*D2R; - if(fabs(latbegin+90) < EPSLN10) - latbegin_in = -0.5*M_PI; - else - latbegin_in = latbegin*D2R; - - setup_bilinear_interp(ntiles_in, grid_in, ntiles_out, grid_out, interp, opcode, dlon_in, dlat_in, lonbegin_in, latbegin_in ); + if(fabs(lonend-lonbegin-360) < EPSLN10) dlon_in = M_PI+M_PI; + else dlon_in = (lonend-lonbegin)*D2R; + + if(fabs(latend-latbegin-180) < EPSLN10) dlat_in = M_PI; + else dlat_in = (latend-latbegin)*D2R; + + if(fabs(lonbegin) < EPSLN10) lonbegin_in = 0.0; + else lonbegin_in = lonbegin*D2R; + + if(fabs(latbegin+90) < EPSLN10) latbegin_in = -0.5*M_PI; + else latbegin_in = latbegin*D2R; + + //setup_bilinear_interp(ntiles_in, grid_in, ntiles_out, grid_out, interp, opcode, dlon_in, dlat_in, lonbegin_in, latbegin_in ); } - else - setup_conserve_interp(ntiles_in, grid_in, ntiles_out, grid_out, interp, opcode); + else setup_conserve_interp_acc(ntiles_in, grid_in, ntiles_out, grid_out, interp_acc, opcode); + if(debug) { time_end = clock(); time_setup_interp = 1.0*(time_end - time_start)/CLOCKS_PER_SEC; @@ -967,9 +929,7 @@ int main(int argc, char* argv[]) if(save_weight_only) { if(mpp_pe() == mpp_root_pe() ) { printf("NOTE: Successfully running fregrid and the following files which store weight information are generated.\n"); - for(n=0; nvar->nz != 1) mpp_error("fregrid: when test_case is specified, number of vertical level must be 1"); - file_in->nt = 1; - file_out->nt = 1; - } - /* Then doing the regridding */ for(m=0; mnt; m++) { - int memsize, level_z, level_n, level_t; + int level_z, level_n, level_t; write_output_time(ntiles_out, file_out, m); if(nfiles > 1) write_output_time(ntiles_out, file2_out, m); @@ -1009,15 +961,19 @@ int main(int argc, char* argv[]) if( !scalar_in->var[l].has_taxis && m>0) continue; if( !scalar_in->var[l].do_regrid ) continue; level_t = m + scalar_in->var[l].lstart; + /*--- to reduce memory usage, we are only do remapping for on horizontal level one time */ for(level_n =0; level_n < scalar_in->var[l].nn; level_n++) { if(extrapolate) { get_input_data(ntiles_in, scalar_in, grid_in, bound_T, l, -1, level_n, level_t, extrapolate, stop_crit); allocate_field_data(ntiles_out, scalar_out, grid_out, scalar_in->var[l].nz); - if( opcode & BILINEAR ) - do_scalar_bilinear_interp(interp, l, ntiles_in, grid_in, grid_out, scalar_in, scalar_out, finer_step, fill_missing); + if( opcode & BILINEAR ) { + printf("BILINEAR IS NOT SUPPORTED YET"); + //do_scalar_bilinear_interp(interp, l, ntiles_in, grid_in, grid_out, scalar_in, scalar_out, finer_step, fill_missing); + } else - do_scalar_conserve_interp(interp, l, ntiles_in, grid_in, ntiles_out, grid_out, scalar_in, scalar_out, opcode, scalar_in->var[l].nz); + do_scalar_conserve_interp_acc(interp_acc, l, ntiles_in, grid_in, ntiles_out, grid_out, scalar_in, + scalar_out, opcode); if(vertical_interp) do_vertical_interp(&vgrid_in, &vgrid_out, grid_out, scalar_out, l); write_field_data(ntiles_out, scalar_out, grid_out, l, -1, level_n, m); if(scalar_out->var[l].interp_method == CONSERVE_ORDER2) { @@ -1028,51 +984,51 @@ int main(int argc, char* argv[]) } for(n=0; nvar[l].kstart; level_z <= scalar_in->var[l].kend; level_z++) - { - if(debug) time_start = clock(); - if(test_case) - get_test_input_data(test_case, test_param, ntiles_in, scalar_in, grid_in, bound_T, opcode); - else - get_input_data(ntiles_in, scalar_in, grid_in, bound_T, l, level_z, level_n, level_t, extrapolate, stop_crit); - if(debug) { - time_end = clock(); - time_get_input += 1.0*(time_end - time_start)/CLOCKS_PER_SEC; - } + for(level_z=scalar_in->var[l].kstart; level_z <= scalar_in->var[l].kend; level_z++) { + if(debug) time_start = clock(); + get_input_data(ntiles_in, scalar_in, grid_in, bound_T, l, level_z, level_n, level_t, extrapolate, stop_crit); + if(debug) { + time_end = clock(); + time_get_input += 1.0*(time_end - time_start)/CLOCKS_PER_SEC; + } - allocate_field_data(ntiles_out, scalar_out, grid_out, 1); - if(debug) time_start = clock(); - if( opcode & BILINEAR ) - do_scalar_bilinear_interp(interp, l, ntiles_in, grid_in, grid_out, scalar_in, scalar_out, finer_step, fill_missing); - else - do_scalar_conserve_interp(interp, l, ntiles_in, grid_in, ntiles_out, grid_out, scalar_in, scalar_out, opcode,1); - if(debug) { - time_end = clock(); - time_do_interp += 1.0*(time_end - time_start)/CLOCKS_PER_SEC; - } + allocate_field_data(ntiles_out, scalar_out, grid_out, 1); + if(debug) time_start = clock(); + if( opcode & BILINEAR ) { + printf("BILINEAR IS NOT SUPPORTED YET"); + //do_scalar_bilinear_interp(interp, l, ntiles_in, grid_in, grid_out, scalar_in, scalar_out, finer_step, fill_missing); + } + else + do_scalar_conserve_interp_acc(interp_acc, l, ntiles_in, grid_in, ntiles_out, grid_out, + scalar_in, scalar_out, opcode); + if(debug) { + time_end = clock(); + time_do_interp += 1.0*(time_end - time_start)/CLOCKS_PER_SEC; + } - if(debug) time_start = clock(); - write_field_data(ntiles_out, scalar_out, grid_out, l, level_z, level_n, m); - if(debug) { - time_end = clock(); - time_write += 1.0*(time_end - time_start)/CLOCKS_PER_SEC; - } - if(scalar_out->var[l].interp_method == CONSERVE_ORDER2) { - for(n=0; nvar[l].interp_method == CONSERVE_ORDER2) { + for(n=0; n0) continue; @@ -1081,11 +1037,12 @@ int main(int argc, char* argv[]) get_input_data(ntiles_in, v_in, grid_in, bound_T, l, level_z, level_n, level_t, extrapolate, stop_crit); allocate_field_data(ntiles_out, u_out, grid_out, u_in[n].var[l].nz); allocate_field_data(ntiles_out, v_out, grid_out, u_in[n].var[l].nz); - if( opcode & BILINEAR ) - do_vector_bilinear_interp(interp, l, ntiles_in, grid_in, ntiles_out, grid_out, u_in, v_in, u_out, v_out, finer_step, fill_missing); + if( opcode & BILINEAR ) { + printf("BILINEAR IS NOT SUPPORTED YET"); + //do_vector_bilinear_interp(interp, l, ntiles_in, grid_in, ntiles_out, grid_out, u_in, v_in, u_out, v_out, finer_step, fill_missing); + } else - do_vector_conserve_interp(interp, l, ntiles_in, grid_in, ntiles_out, grid_out, u_in, v_in, u_out, v_out, opcode); - + mpp_error("vector conservative interpolation has not been implemented yet"); write_field_data(ntiles_out, u_out, grid_out, l, level_z, level_n, m); write_field_data(ntiles_out, v_out, grid_out, l, level_z, level_n, m); for(n=0; n. + **********************************************************************/ +#include +#include +#include +#include +#include "fregrid_util.h" +#include "mpp.h" +#include "mpp_io.h" +#include "globals_acc.h" + +/******************************************************************************* +void set_remap_file( ) +*******************************************************************************/ +void set_remap_file_acc( int ntiles, const char *mosaic_file, const char *remap_file, Interp_config_acc *interp_acc, + unsigned int *opcode, int save_weight_only) +{ + int i, len, m, fid, vid; + size_t start[4], nread[4]; + char str1[STRING], tilename[STRING]; + int file_exist; + + if(!remap_file) return; + + for(i=0; i<4; i++) { + start[i] = 0; nread[i] = 1; + } + nread[1] = STRING; + + len = strlen(remap_file); + if(len >= STRING) mpp_error("setoutput_remap_file(fregrid_util): length of remap_file should be less than STRING"); + if( strcmp(remap_file+len-3, ".nc")==0 ) { + strncpy(str1, remap_file, len-3); + str1[len-3] = 0; + } + else + strcpy(str1, remap_file); + + (*opcode) |= WRITE; + + if(ntiles>1) { + fid = mpp_open(mosaic_file, MPP_READ); + vid = mpp_get_varid(fid, "gridtiles"); + } + + for(m=0; m 1) { + start[0] = m; + mpp_get_var_value_block(fid, vid, start, nread, tilename); + if(strlen(str1) + strlen(tilename) > STRING -5) mpp_error("set_output_remap_file(fregrid_util): length of str1 + " + "length of tilename should be no greater than STRING-5"); + sprintf(interp_acc[m].remap_file, "%s.%s.nc", str1, tilename); + } + else + sprintf(interp_acc[m].remap_file, "%s.nc", str1); + /* check interp_acc file to be read (=1) or write ( = 2) */ + if(!save_weight_only && mpp_file_exist(interp_acc[m].remap_file)) { + (*opcode) |= READ; + interp_acc[m].file_exist = 1; + } + + } + + if(ntiles>1) mpp_close(fid); + +};/* set_remap_file */ diff --git a/tools/fregrid_acc/fregrid_utils_acc.h b/tools/fregrid_acc/fregrid_utils_acc.h new file mode 100644 index 00000000..5bc8fc5c --- /dev/null +++ b/tools/fregrid_acc/fregrid_utils_acc.h @@ -0,0 +1,26 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ +#ifndef FREGRID_UTILS_ACC_H +#define FREGRID_UTILS_ACC_H + +void set_remap_file_acc( int ntiles, const char *mosaic_file, const char *remap_file, Interp_config_acc *interp_acc, + unsigned int *opcode, int save_weight_only); + +#endif diff --git a/tools/fregrid_acc/interp_utils_acc.c b/tools/fregrid_acc/interp_utils_acc.c new file mode 100644 index 00000000..bf4420d8 --- /dev/null +++ b/tools/fregrid_acc/interp_utils_acc.c @@ -0,0 +1,101 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ +#include +#include +#include +#include "globals_acc.h" +#include "general_utils_acc.h" + +/******************************************************************************* +void copy_grid_to_device( const int itile, Grid_config *grid ) +Copies lat lon coordinates to device +*******************************************************************************/ +void copy_grid_to_device_acc( const int npoints, const double *lat, const double *lon ) +{ + +#pragma acc enter data copyin(lon[:npoints], lat[:npoints]) + +} + +void delete_grid_from_device_acc( const int npoints, const double *lat, const double *lon ) +{ + +#pragma acc exit data delete(lat[:npoints], lon[:npoints]) + +} + +/******************************************************************************* +void copy_interp_to_device( Interp_config *interp ) +Copies the interp struct to device +*******************************************************************************/ +void copy_interp_to_device_acc( const int ntiles_in, const int ntiles_out, const Interp_config_acc *interp_acc, + const unsigned int opcode ) +{ + +#pragma acc enter data copyin(interp_acc[:ntiles_out]) + for(int otile=0 ; otile. + **********************************************************************/ +#ifndef FREGRID_UTILS_ACC_H_ +#define FREGRID_UTILS_ACC_H_ + +#include "globals_acc.h" + +void copy_grid_to_device_acc( const int npoints, const double *lat, const double *lon ); + +void delete_grid_from_device_acc( const int npoints, const double *lat, const double *lon ); + +void copy_interp_to_device_acc( const int ntiles_in, const int ntiles_out, const Interp_config_acc *interp_acc, + const unsigned int opcode ); + +void get_input_grid_mask_acc(const int mask_size, double **input_grid_mask); + +void free_input_grid_mask_acc(const int mask_size, double **input_grid_mask); + +void create_interp_per_intile_arrays_on_device_acc(const int nxcells, const unsigned int opcode, + Interp_per_input_tile *interp_per_itile); + +#endif diff --git a/tools/libfrencutils/constant.h b/tools/libfrencutils/constant.h index 82cc19e6..e12edeb1 100644 --- a/tools/libfrencutils/constant.h +++ b/tools/libfrencutils/constant.h @@ -43,5 +43,6 @@ #define TPI (2.0*M_PI) #define HPI (0.5*M_PI) -#endif +#define GAREA (4*M_PI*RADIUS*RADIUS) +#endif diff --git a/tools/libfrencutils_acc/Makefile.am b/tools/libfrencutils_acc/Makefile.am index 7bafc27f..6b8d25c0 100644 --- a/tools/libfrencutils_acc/Makefile.am +++ b/tools/libfrencutils_acc/Makefile.am @@ -22,6 +22,10 @@ noinst_LIBRARIES = libfrencutils_acc.a AM_CFLAGS = $(NETCDF_CFLAGS) -I$(top_srcdir)/tools/libfrencutils -acc libfrencutils_acc_a_SOURCES = create_xgrid_acc.c \ - create_xgrid_acc.h + create_xgrid_acc.h \ + create_xgrid_utils_acc.c \ + create_xgrid_utils_acc.h \ + general_utils_acc.c \ + general_utils_acc.h libfrencutils_acc_a_LIBADD = $(top_builddir)/tools/libfrencutils/libfrencutils.a diff --git a/tools/libfrencutils_acc/create_xgrid_acc.c b/tools/libfrencutils_acc/create_xgrid_acc.c index 14d7bea4..64b24eca 100644 --- a/tools/libfrencutils_acc/create_xgrid_acc.c +++ b/tools/libfrencutils_acc/create_xgrid_acc.c @@ -20,1028 +20,479 @@ #include #include #include -#include "mosaic_util.h" -#include "create_xgrid.h" +#include +#include "general_utils_acc.h" #include "create_xgrid_acc.h" -#include "constant.h" - -#define AREA_RATIO_THRESH (1.e-6) -#define MASK_THRESH (0.5) +#include "create_xgrid_utils_acc.h" +#include "globals_acc.h" /******************************************************************************* -void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, const double *area) - return the grid area. +void get_upbound_nxcells_2dx2d_acc +This function computes the upperbound to nxgrid. This upper bound will be used +to malloc arrays used in create_xgrid *******************************************************************************/ -void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) -{ - int nx, ny, nxp, i, j, n_in; - double x_in[20], y_in[20]; - - nx = *nlon; - ny = *nlat; - nxp = nx + 1; - - for(j=0; j MASK_THRESH ) { + + int i_approx_xcells_per_ij1=0; + int ij2_min=output_grid_ncells, ij2_max=0; + double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV]; + + get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + input_cell_lon_vertices, input_cell_lat_vertices); + + double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); + int nvertices = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_acc(nvertices, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_acc(nvertices, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_acc(nvertices, input_cell_lon_vertices); + + approx_xcells_per_ij1[ij1]=0; + +#pragma acc loop independent reduction(+:upbound_nxcells) reduction(+:i_approx_xcells_per_ij1) \ + reduction(min:ij2_min) reduction(max:ij2_max) + + for(int ij2=0; ij2lat_min[ij2] >= input_cell_lat_max) continue; + if(output_grid_cells->lat_max[ij2] <= input_cell_lat_min) continue; + + dlon_cent = output_grid_cells->lon_cent[ij2] - input_cell_lon_cent; + if(dlon_cent < -M_PI) rotate = +TPI; + if(dlon_cent > M_PI) rotate = -TPI; + + // adjust according to input_grid_lon_cent + // TODO: breakup grid into quadrants to avoid if statements? + output_cell_lon_min = output_grid_cells->lon_min[ij2] + rotate; + output_cell_lon_max = output_grid_cells->lon_max[ij2] + rotate; + + //output_cell_lon should in the same range as input_cell_lon after lon_fix, + // so no need to consider cyclic condition + if(output_cell_lon_min >= input_cell_lon_max ) continue; + if(output_cell_lon_max <= input_cell_lon_min ) continue; + + //Note, the check for AREA_RATIO_THRESH has been removed + //Thus, the computed value of upbound_nxcells will be equal to or greater than nxgrid + i_approx_xcells_per_ij1++; + upbound_nxcells++; + ij2_min = min(ij2_min, ij2); + ij2_max = max(ij2_max, ij2); + + } //ij2 + approx_xcells_per_ij1[ij1] = i_approx_xcells_per_ij1; + ij2_start[ij1] = ij2_min ; + ij2_end[ij1] = ij2_max; + + } //mask + } //ij1 + + return upbound_nxcells; + +} /******************************************************************************* - void create_xgrid_1dx2d_order1 + void create_xgrid_2DX2D_order1 This routine generate exchange grids between two grids for the first order - conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell - and lon_in,lat_in are 1-D grid bounds, lon_out,lat_out are geographic grid location of grid cell bounds. + conservative interpolation. nlon_input_cells,ninput_grid_lat,nlon_output_cells,nlat_output_cells are the size of the grid cell + and input_grid_lon,input_grid_lat, output_grid_lon,output_grid_lat are geographic grid location of grid cell bounds. *******************************************************************************/ -int create_xgrid_1dx2d_order1_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, - const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area) +int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_input_cells, + const int nlon_output_cells, const int nlat_output_cells, + const int jlat_overlap_starts, const int jlat_overlap_ends, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const int upbound_nxcells, const double *mask_input_grid, + const Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Interp_per_input_tile *interp_for_itile) { - int nx1, ny1, nx2, ny2, nx1p, nx2p; - int i1, j1, i2, j2, nxgrid; - double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV]; - double *area_in, *area_out, min_area; - double *tmpx, *tmpy; - - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; - - nxgrid = 0; - nx1p = nx1 + 1; - nx2p = nx2 + 1; - - area_in = (double *)malloc(nx1*ny1*sizeof(double)); - area_out = (double *)malloc(nx2*ny2*sizeof(double)); - tmpx = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double)); - tmpy = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double)); - for(j1=0; j1<=ny1; j1++) for(i1=0; i1<=nx1; i1++) { - tmpx[j1*nx1p+i1] = lon_in[i1]; - tmpy[j1*nx1p+i1] = lat_in[j1]; - } - /* This is just a temporary fix to solve the issue that there is one point in zonal direction */ - // TODO: Finish this "temporary fix" - if(nx1 > 1) - get_grid_area(nlon_in, nlat_in, tmpx, tmpy, area_in); - else - get_grid_area_no_adjust(nlon_in, nlat_in, tmpx, tmpy, area_in); - - get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out); - free(tmpx); - free(tmpy); - - for(j1=0; j1 MASK_THRESH ) { - - ll_lon = lon_in[i1]; ll_lat = lat_in[j1]; - ur_lon = lon_in[i1+1]; ur_lat = lat_in[j1+1]; - for(j2=0; j2=ur_lat) && (y_in[1]>=ur_lat) - && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue; - - x_in[0] = lon_out[j2*nx2p+i2]; - x_in[1] = lon_out[j2*nx2p+i2+1]; - x_in[2] = lon_out[(j2+1)*nx2p+i2+1]; - x_in[3] = lon_out[(j2+1)*nx2p+i2]; - n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2); - - if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) { - Xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if( Xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = Xarea; - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; - ++nxgrid; - if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID"); - } - } - } - } - - free(area_in); - free(area_out); - - return nxgrid; - -}; /* create_xgrid_1dx2d_order1 */ - - -/******************************************************************************** - void create_xgrid_1dx2d_order2 - This routine generate exchange grids between two grids for the second order - conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell - and lon_in,lat_in are 1-D grid bounds, lon_out,lat_out are geographic grid location of grid cell bounds. -********************************************************************************/ -int create_xgrid_1dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat) -{ - - int nx1, ny1, nx2, ny2, nx1p, nx2p; - int i1, j1, i2, j2, nxgrid, n; - double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV]; - double *area_in, *area_out, min_area; - double *tmpx, *tmpy; - - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; - - nxgrid = 0; - nx1p = nx1 + 1; - nx2p = nx2 + 1; + if(upbound_nxcells<1) return 0; + + int nxcells=0; + + int input_grid_ncells = nlon_input_cells*nlat_input_cells; + int output_grid_ncells = nlon_output_cells*nlat_output_cells; + int input_grid_npts = (nlon_input_cells+1)*(nlat_input_cells+1); + int output_grid_npts = (nlon_output_cells+1)*(nlat_output_cells+1); + + int ij1_start = jlat_overlap_starts*nlon_input_cells; + int ij1_end = (jlat_overlap_ends+1)*nlon_input_cells; + + double xcell_dclon=-99.99, xcell_dclat=-99.99; + + int *parent_input_index = NULL ; parent_input_index = (int *)malloc(upbound_nxcells*sizeof(int)); + int *parent_output_index = NULL ; parent_output_index = (int *)malloc(upbound_nxcells*sizeof(int)); + int *nxcells_per_ij1 = NULL ; nxcells_per_ij1 = (int *)malloc(input_grid_ncells*sizeof(int)); + double *store_xcell_area = NULL ; store_xcell_area = (double *)malloc(upbound_nxcells*sizeof(double)); + +#pragma acc enter data create(parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells], \ + nxcells_per_ij1[:input_grid_ncells]) + +#pragma acc data present(output_grid_lon[:output_grid_npts], \ + output_grid_lat[:output_grid_npts], \ + input_grid_lon[:input_grid_npts], \ + input_grid_lat[:input_grid_npts], \ + output_grid_cells[:1], \ + approx_nxcells_per_ij1[:input_grid_ncells], \ + ij2_start[:input_grid_ncells], \ + ij2_end[:input_grid_ncells], \ + mask_input_grid[:input_grid_ncells], \ + nxcells_per_ij1[:input_grid_ncells], \ + parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells]) \ + copyin(input_grid_ncells, output_grid_ncells) \ + copy(nxcells) +#pragma acc parallel loop reduction(+:nxcells) + for(int ij1=ij1_start; ij1 MASK_THRESH) { + + double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV]; + int approx_nxcells_b4_ij1=0, ixcell=0; + + get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + input_cell_lon_vertices, input_cell_lat_vertices); + double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); + int nvertices1 = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_area = poly_area_acc(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); + +#pragma acc loop seq + for(int i=1; i<=ij1 ; i++) approx_nxcells_b4_ij1 += approx_nxcells_per_ij1[i-1]; + nxcells_per_ij1[ij1]=0; + +#pragma acc loop seq reduction(+:ixcell) + for(int ij2=ij2_start[ij1]; ij2<=ij2_end[ij1]; ij2++) { + + int nvertices2, xvertices=1; + double dlon_cent, output_cell_lon_min, output_cell_lon_max, output_cell_area; + double output_cell_lon_vertices[MAX_V], output_cell_lat_vertices[MAX_V]; + double xcell_lon_vertices[MV], xcell_lat_vertices[MV]; + + double rotate=0.0; + + if(output_grid_cells->lat_min[ij2] >= input_cell_lat_max) continue; + if(output_grid_cells->lat_max[ij2] <= input_cell_lat_min) continue; + + /* adjust according to input_grid_lon_cent*/ + output_cell_lon_min = output_grid_cells->lon_min[ij2]; + output_cell_lon_max = output_grid_cells->lon_max[ij2]; + nvertices2 = output_grid_cells->nvertices[ij2]; + output_cell_area = output_grid_cells->area[ij2]; + + dlon_cent = output_grid_cells->lon_cent[ij2] - input_cell_lon_cent; + + if(dlon_cent < -M_PI) rotate = TPI; + if(dlon_cent > M_PI) rotate = -TPI; + + output_cell_lon_min += rotate; + output_cell_lon_max += rotate; + for (int l=0; llon_vertices[ij2][l] + rotate; + output_cell_lat_vertices[l] = output_grid_cells->lat_vertices[ij2][l]; + } - area_in = (double *)malloc(nx1*ny1*sizeof(double)); - area_out = (double *)malloc(nx2*ny2*sizeof(double)); - tmpx = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double)); - tmpy = (double *)malloc((nx1+1)*(ny1+1)*sizeof(double)); - for(j1=0; j1<=ny1; j1++) for(i1=0; i1<=nx1; i1++) { - tmpx[j1*nx1p+i1] = lon_in[i1]; - tmpy[j1*nx1p+i1] = lat_in[j1]; - } - get_grid_area(nlon_in, nlat_in, tmpx, tmpy, area_in); - get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out); - free(tmpx); - free(tmpy); - - for(j1=0; j1 MASK_THRESH ) { - - ll_lon = lon_in[i1]; ll_lat = lat_in[j1]; - ur_lon = lon_in[i1+1]; ur_lat = lat_in[j1+1]; - for(j2=0; j2=ur_lat) && (y_in[1]>=ur_lat) - && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue; - - x_in[0] = lon_out[j2*nx2p+i2]; - x_in[1] = lon_out[j2*nx2p+i2+1]; - x_in[2] = lon_out[(j2+1)*nx2p+i2+1]; - x_in[3] = lon_out[(j2+1)*nx2p+i2]; - n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2); - lon_in_avg = avgval_double(n_in, x_in); - - if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) { - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if(xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = xarea; - xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out ); - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; - ++nxgrid; - if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID"); - } - } + //output_cell_lon should in the same range as input_cell_lon after lon_fix, + // so no need to consider cyclic condition + if(output_cell_lon_min >= input_cell_lon_max ) continue; + if(output_cell_lon_max <= input_cell_lon_min ) continue; + + if ( (xvertices = clip_2dx2d_acc( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, + output_cell_lon_vertices, output_cell_lat_vertices, nvertices2, + xcell_lon_vertices, xcell_lat_vertices)) > 0 ){ + double xcell_area = poly_area_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices); + if( xcell_area/min(input_cell_area, output_cell_area) > AREA_RATIO_THRESH ) { + store_xcell_area[approx_nxcells_b4_ij1+ixcell] = xcell_area; + parent_input_index[approx_nxcells_b4_ij1+ixcell] = ij1; + parent_output_index[approx_nxcells_b4_ij1+ixcell] = ij2; + ixcell++; } + } } - free(area_in); - free(area_out); - - return nxgrid; - -}; /* create_xgrid_1dx2d_order2 */ - -/******************************************************************************* - void create_xgrid_2dx1d_order1 - This routine generate exchange grids between two grids for the first order - conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell - and lon_out,lat_out are 1-D grid bounds, lon_in,lat_in are geographic grid location of grid cell bounds. - mask is on grid lon_in/lat_in. -*******************************************************************************/ -int create_xgrid_2dx1d_order1_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, - const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area) -{ - - int nx1, ny1, nx2, ny2, nx1p, nx2p; - int i1, j1, i2, j2, nxgrid; - double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV]; - double *area_in, *area_out, min_area; - double *tmpx, *tmpy; - - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; - - nxgrid = 0; - nx1p = nx1 + 1; - nx2p = nx2 + 1; - area_in = (double *)malloc(nx1*ny1*sizeof(double)); - area_out = (double *)malloc(nx2*ny2*sizeof(double)); - tmpx = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double)); - tmpy = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double)); - for(j2=0; j2<=ny2; j2++) for(i2=0; i2<=nx2; i2++) { - tmpx[j2*nx2p+i2] = lon_out[i2]; - tmpy[j2*nx2p+i2] = lat_out[j2]; - } - get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in); - get_grid_area(nlon_out, nlat_out, tmpx, tmpy, area_out); - - free(tmpx); - free(tmpy); - - for(j2=0; j2 MASK_THRESH ) { - int n_in, n_out; - double Xarea; - - y_in[0] = lat_in[j1*nx1p+i1]; - y_in[1] = lat_in[j1*nx1p+i1+1]; - y_in[2] = lat_in[(j1+1)*nx1p+i1+1]; - y_in[3] = lat_in[(j1+1)*nx1p+i1]; - if ( (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat) - && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue; - if ( (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat) - && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue; - - x_in[0] = lon_in[j1*nx1p+i1]; - x_in[1] = lon_in[j1*nx1p+i1+1]; - x_in[2] = lon_in[(j1+1)*nx1p+i1+1]; - x_in[3] = lon_in[(j1+1)*nx1p+i1]; - - n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2); - - if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) { - Xarea = poly_area ( x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if( Xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = Xarea; - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; - ++nxgrid; - if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID"); - } - } - } + nxcells+=ixcell; + nxcells_per_ij1[ij1]=ixcell; } + } - free(area_in); - free(area_out); - - return nxgrid; - -}; /* create_xgrid_2dx1d_order1 */ - - -/******************************************************************************** - void create_xgrid_2dx1d_order2 - This routine generate exchange grids between two grids for the second order - conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell - and lon_out,lat_out are 1-D grid bounds, lon_in,lat_in are geographic grid location of grid cell bounds. - mask is on grid lon_in/lat_in. -********************************************************************************/ -int create_xgrid_2dx1d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat) -{ - - int nx1, ny1, nx2, ny2, nx1p, nx2p; - int i1, j1, i2, j2, nxgrid, n; - double ll_lon, ll_lat, ur_lon, ur_lat, x_in[MV], y_in[MV], x_out[MV], y_out[MV]; - double *tmpx, *tmpy; - double *area_in, *area_out, min_area; - double lon_in_avg; - - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; + copy_data_to_interp_on_device_acc(nxcells, input_grid_ncells, upbound_nxcells, nxcells_per_ij1, + &xcell_dclon, &xcell_dclat, + approx_nxcells_per_ij1, parent_input_index, parent_output_index, + store_xcell_area, interp_for_itile); - nxgrid = 0; - nx1p = nx1 + 1; - nx2p = nx2 + 1; +#pragma acc exit data delete( parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells], \ + nxcells_per_ij1[:input_grid_ncells]) - area_in = (double *)malloc(nx1*ny1*sizeof(double)); - area_out = (double *)malloc(nx2*ny2*sizeof(double)); - tmpx = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double)); - tmpy = (double *)malloc((nx2+1)*(ny2+1)*sizeof(double)); - for(j2=0; j2<=ny2; j2++) for(i2=0; i2<=nx2; i2++) { - tmpx[j2*nx2p+i2] = lon_out[i2]; - tmpy[j2*nx2p+i2] = lat_out[j2]; - } - get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in); - get_grid_area(nlon_out, nlat_out, tmpx, tmpy, area_out); - - free(tmpx); - free(tmpy); - - for(j2=0; j2 MASK_THRESH ) { - int n_in, n_out; - double xarea; - - y_in[0] = lat_in[j1*nx1p+i1]; - y_in[1] = lat_in[j1*nx1p+i1+1]; - y_in[2] = lat_in[(j1+1)*nx1p+i1+1]; - y_in[3] = lat_in[(j1+1)*nx1p+i1]; - if ( (y_in[0]<=ll_lat) && (y_in[1]<=ll_lat) - && (y_in[2]<=ll_lat) && (y_in[3]<=ll_lat) ) continue; - if ( (y_in[0]>=ur_lat) && (y_in[1]>=ur_lat) - && (y_in[2]>=ur_lat) && (y_in[3]>=ur_lat) ) continue; - - x_in[0] = lon_in[j1*nx1p+i1]; - x_in[1] = lon_in[j1*nx1p+i1+1]; - x_in[2] = lon_in[(j1+1)*nx1p+i1+1]; - x_in[3] = lon_in[(j1+1)*nx1p+i1]; - - n_in = fix_lon(x_in, y_in, 4, (ll_lon+ur_lon)/2); - lon_in_avg = avgval_double(n_in, x_in); - - if ( (n_out = clip ( x_in, y_in, n_in, ll_lon, ll_lat, ur_lon, ur_lat, x_out, y_out )) > 0 ) { - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if(xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = xarea; - xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out ); - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; - ++nxgrid; - if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID"); - } - } - } - } + free(parent_input_index) ; parent_input_index = NULL; + free(parent_output_index); parent_output_index = NULL; + free(nxcells_per_ij1) ; nxcells_per_ij1 = NULL; + free(store_xcell_area) ; store_xcell_area = NULL; - free(area_in); - free(area_out); + return nxcells; - return nxgrid; +};/* get_xgrid_2Dx2D_order1 */ -}; /* create_xgrid_2dx1d_order2 */ /******************************************************************************* - void create_xgrid_2DX2D_order1 - This routine generate exchange grids between two grids for the first order - conservative interpolation. nlon_in,nlat_in,nlon_out,nlat_out are the size of the grid cell - and lon_in,lat_in, lon_out,lat_out are geographic grid location of grid cell bounds. - mask is on grid lon_in/lat_in. + void create_xgrid_2DX2D_order2 + This routine generate exchange grids between two grids for the second order *******************************************************************************/ -int create_xgrid_2dx2d_order1_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area) +int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_input_cells, + const int nlon_output_cells, const int nlat_output_cells, + const int jlat_overlap_starts, const int jlat_overlap_ends, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const int upbound_nxcells, const double *mask_input_grid, + const Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Interp_per_input_tile *interp_for_itile, double *readin_input_area) { -#define MAX_V 8 - int nx1, nx2, ny1, ny2, nx1p, nx2p, nxgrid; - double *area_in, *area_out; - int nblocks =1; - int *istart2=NULL, *iend2=NULL; - int npts_left, nblks_left, pos, m, npts_my, ij; - double *lon_out_min_list,*lon_out_max_list,*lon_out_avg,*lat_out_min_list,*lat_out_max_list; - double *lon_out_list, *lat_out_list; - int *pnxgrid=NULL, *pstart; - int *pi_in=NULL, *pj_in=NULL, *pi_out=NULL, *pj_out=NULL; - double *pxgrid_area=NULL; - int *n2_list; - int nthreads, nxgrid_block_max; - - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; - nx1p = nx1 + 1; - nx2p = nx2 + 1; - - area_in = (double *)malloc(nx1*ny1*sizeof(double)); - area_out = (double *)malloc(nx2*ny2*sizeof(double)); - get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in); - get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out); - - nthreads = 1; -#if defined(_OPENMP) -#pragma omp parallel - nthreads = omp_get_num_threads(); -#endif - - nblocks = nthreads; - - istart2 = (int *)malloc(nblocks*sizeof(int)); - iend2 = (int *)malloc(nblocks*sizeof(int)); - - pstart = (int *)malloc(nblocks*sizeof(int)); - pnxgrid = (int *)malloc(nblocks*sizeof(int)); - - nxgrid_block_max = MAXXGRID/nblocks; - - for(m=0; m MAX_V) error_handler("create_xgrid.c: n2_in is greater than MAX_V"); - lon_out_min_list[n] = minval_double(n2_in, x2_in); - lon_out_max_list[n] = maxval_double(n2_in, x2_in); - lon_out_avg[n] = avgval_double(n2_in, x2_in); - n2_list[n] = n2_in; - for(l=0; l MASK_THRESH ) { - int n0, n1, n2, n3, l,n1_in; - double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; - double x1_in[MV], y1_in[MV], x_out[MV], y_out[MV]; - - n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; - n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; - x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; - x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1]; - x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2]; - x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3]; - lat_in_min = minval_double(4, y1_in); - lat_in_max = maxval_double(4, y1_in); - n1_in = fix_lon(x1_in, y1_in, 4, M_PI); - lon_in_min = minval_double(n1_in, x1_in); - lon_in_max = maxval_double(n1_in, x1_in); - lon_in_avg = avgval_double(n1_in, x1_in); - for(ij=istart2[m]; ij<=iend2[m]; ij++) { - int n_in, n_out, i2, j2, n2_in; - double xarea, dx, lon_out_min, lon_out_max; - double x2_in[MAX_V], y2_in[MAX_V]; - - i2 = ij%nx2; - j2 = ij/nx2; - - if(lat_out_min_list[ij] >= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue; - /* adjust x2_in according to lon_in_avg*/ - n2_in = n2_list[ij]; - for(l=0; l M_PI) { - lon_out_min -= TPI; - lon_out_max -= TPI; - for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; - if ( (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) { - double min_area; - int nn; - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if( xarea/min_area > AREA_RATIO_THRESH ) { - pnxgrid[m]++; - if(pnxgrid[m]>= MAXXGRID/nthreads) - error_handler("The xgrid size is too large for resources.\n" - " nxgrid is greater than MAXXGRID/nthreads; increase MAXXGRID,\n" - " decrease nthreads, or increase number of MPI ranks."); - nn = pstart[m] + pnxgrid[m]-1; - - pxgrid_area[nn] = xarea; - pi_in[nn] = i1; - pj_in[nn] = j1; - pi_out[nn] = i2; - pj_out[nn] = j2; - } - - } + if(upbound_nxcells<1) return 0; + + int nxcells=0; + + int input_grid_ncells = nlon_input_cells*nlat_input_cells; + int output_grid_ncells = nlon_output_cells*nlat_output_cells; + int input_grid_npts = (nlon_input_cells+1)*(nlat_input_cells+1); + int output_grid_npts = (nlon_output_cells+1)*(nlat_output_cells+1); + + int ij1_start = jlat_overlap_starts*nlon_input_cells; + int ij1_end = (jlat_overlap_ends+1)*nlon_input_cells; + + int *parent_input_index=NULL ; parent_input_index = (int *)malloc(upbound_nxcells*sizeof(int)); + int *parent_output_index=NULL ; parent_output_index = (int *)malloc(upbound_nxcells*sizeof(int)); + double *store_xcell_area=NULL ; store_xcell_area = (double *)malloc(upbound_nxcells*sizeof(double)); + double *store_xcell_dclon=NULL ; store_xcell_dclon = (double *)malloc(upbound_nxcells*sizeof(double)); + double *store_xcell_dclat=NULL ; store_xcell_dclat = (double *)malloc(upbound_nxcells*sizeof(double)); + + int *nxcells_per_ij1=NULL ; nxcells_per_ij1 = (int *)malloc(input_grid_ncells*sizeof(int)); + double *summed_input_area=NULL; summed_input_area = (double *)malloc(input_grid_ncells*sizeof(double)); + double *summed_input_clat=NULL; summed_input_clat = (double *)malloc(input_grid_ncells*sizeof(double)); + double *summed_input_clon=NULL; summed_input_clon = (double *)malloc(input_grid_ncells*sizeof(double)); + +#pragma acc enter data create(parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells], \ + nxcells_per_ij1[:input_grid_ncells], \ + store_xcell_dclon[:upbound_nxcells], \ + store_xcell_dclat[:upbound_nxcells], \ + summed_input_area[:input_grid_ncells], \ + summed_input_clon[:input_grid_ncells], \ + summed_input_clat[:input_grid_ncells]) + +#pragma acc data present(output_grid_lon[:output_grid_npts], \ + output_grid_lat[:output_grid_npts], \ + input_grid_lon[:input_grid_npts], \ + input_grid_lat[:input_grid_npts], \ + output_grid_cells[:1], \ + approx_nxcells_per_ij1[:input_grid_ncells], \ + ij2_start[:input_grid_ncells], \ + ij2_end[:input_grid_ncells], \ + mask_input_grid[:input_grid_ncells], \ + nxcells_per_ij1[:input_grid_ncells], \ + parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells], \ + store_xcell_dclon[:upbound_nxcells], \ + store_xcell_dclat[:upbound_nxcells]) \ + copyin(input_grid_ncells, output_grid_ncells) copy(nxcells) +#pragma acc parallel loop reduction(+:nxcells) + for(int ij1=ij1_start; ij1 MASK_THRESH) { + + double input_cell_lon_vertices[MV], input_cell_lat_vertices[MV]; + double summed_input_area_ij1=0.; + double summed_input_clon_ij1=0.; + double summed_input_clat_ij1=0.; + int approx_nxcells_b4_ij1=0, ixcell=0; + + get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, + input_cell_lon_vertices, input_cell_lat_vertices); + double input_cell_lat_min = minval_double_acc(4, input_cell_lat_vertices); + double input_cell_lat_max = maxval_double_acc(4, input_cell_lat_vertices); + int nvertices1 = fix_lon_acc(input_cell_lon_vertices, input_cell_lat_vertices, 4, M_PI); + double input_cell_lon_min = minval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_lon_max = maxval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_lon_cent = avgval_double_acc(nvertices1, input_cell_lon_vertices); + double input_cell_area = poly_area_acc(input_cell_lon_vertices, input_cell_lat_vertices, nvertices1); + +#pragma acc loop seq + for(int i=1; i<=ij1 ; i++) approx_nxcells_b4_ij1 += approx_nxcells_per_ij1[i-1]; + +#pragma acc loop seq reduction(+:ixcell) \ + reduction(+:summed_input_area_ij1) \ + reduction(+:summed_input_clon_ij1) \ + reduction(+:summed_input_clat_ij1) + for(int ij2=ij2_start[ij1]; ij2<=ij2_end[ij1]; ij2++) { + + int nvertices2, xvertices=1; + double dlon_cent, output_cell_lon_min, output_cell_lon_max; + double output_cell_lon_vertices[MAX_V], output_cell_lat_vertices[MAX_V]; + double output_cell_area; + double xcell_lon_vertices[MV], xcell_lat_vertices[MV]; + + double rotate=0.0; + + if(output_grid_cells->lat_min[ij2] >= input_cell_lat_max) continue; + if(output_grid_cells->lat_max[ij2] <= input_cell_lat_min) continue; + + dlon_cent = output_grid_cells->lon_cent[ij2] - input_cell_lon_cent; + if(dlon_cent < -M_PI) rotate = TPI; + if(dlon_cent > M_PI) rotate = -TPI; + + /* adjust according to input_grid_lon_cent*/ + output_cell_lon_min = output_grid_cells->lon_min[ij2] + rotate; + output_cell_lon_max = output_grid_cells->lon_max[ij2] + rotate; + nvertices2 = output_grid_cells->nvertices[ij2]; + output_cell_area = output_grid_cells->area[ij2]; + + for (int l=0; llon_vertices[ij2][l] + rotate; + output_cell_lat_vertices[l] = output_grid_cells->lat_vertices[ij2][l]; + } + //output_cell_lon should in the same range as input_cell_lon after lon_fix, + // so no need to consider cyclic condition + if(output_cell_lon_min >= input_cell_lon_max ) continue; + if(output_cell_lon_max <= input_cell_lon_min ) continue; + + if ( (xvertices = clip_2dx2d_acc( input_cell_lon_vertices, input_cell_lat_vertices, nvertices1, + output_cell_lon_vertices, output_cell_lat_vertices, nvertices2, + xcell_lon_vertices, xcell_lat_vertices)) > 0 ){ + double xcell_area = poly_area_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices); + if( xcell_area/min(input_cell_area, output_cell_area) > AREA_RATIO_THRESH ) { + double xcell_clon, xcell_clat; + store_xcell_area[approx_nxcells_b4_ij1+ixcell] = xcell_area; + parent_input_index[approx_nxcells_b4_ij1+ixcell] = ij1; + parent_output_index[approx_nxcells_b4_ij1+ixcell] = ij2; + poly_ctrlon_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices, input_cell_lon_cent, &xcell_clon); + poly_ctrlat_acc(xcell_lon_vertices, xcell_lat_vertices, xvertices, &xcell_clat); + store_xcell_dclon[approx_nxcells_b4_ij1+ixcell] = xcell_clon/xcell_area; + store_xcell_dclat[approx_nxcells_b4_ij1+ixcell] = xcell_clat/xcell_area; + summed_input_area_ij1 += xcell_area; + summed_input_clon_ij1 += xcell_clon; + summed_input_clat_ij1 += xcell_clat; + ixcell++; } } - } - - /*copy data if nblocks > 1 */ - if(nblocks == 1) { - nxgrid = pnxgrid[0]; - pi_in = NULL; - pj_in = NULL; - pi_out = NULL; - pj_out = NULL; - pxgrid_area = NULL; - } - else { - int nn, i; - nxgrid = 0; - for(m=0; m MAX_V) error_handler("create_xgrid.c: n2_in is greater than MAX_V"); - lon_out_min_list[n] = minval_double(n2_in, x2_in); - lon_out_max_list[n] = maxval_double(n2_in, x2_in); - lon_out_avg[n] = avgval_double(n2_in, x2_in); - n2_list[n] = n2_in; - for(l=0; l MASK_THRESH ) { - int n0, n1, n2, n3, l,n1_in; - double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; - double x1_in[MV], y1_in[MV], x_out[MV], y_out[MV]; - - n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; - n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; - x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; - x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1]; - x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2]; - x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3]; - lat_in_min = minval_double(4, y1_in); - lat_in_max = maxval_double(4, y1_in); - n1_in = fix_lon(x1_in, y1_in, 4, M_PI); - lon_in_min = minval_double(n1_in, x1_in); - lon_in_max = maxval_double(n1_in, x1_in); - lon_in_avg = avgval_double(n1_in, x1_in); - for(ij=istart2[m]; ij<=iend2[m]; ij++) { - int n_in, n_out, i2, j2, n2_in; - double xarea, dx, lon_out_min, lon_out_max; - double x2_in[MAX_V], y2_in[MAX_V]; - - i2 = ij%nx2; - j2 = ij/nx2; - - if(lat_out_min_list[ij] >= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue; - /* adjust x2_in according to lon_in_avg*/ - n2_in = n2_list[ij]; - for(l=0; l M_PI) { - lon_out_min -= TPI; - lon_out_max -= TPI; - for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; - if ( (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) { - double min_area; - int nn; - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if( xarea/min_area > AREA_RATIO_THRESH ) { - pnxgrid[m]++; - if(pnxgrid[m]>= MAXXGRID/nthreads) - error_handler("nxgrid is greater than MAXXGRID/nthreads, increase MAXXGRID, decrease nthreads, or increase number of MPI ranks"); - nn = pstart[m] + pnxgrid[m]-1; - pxgrid_area[nn] = xarea; - pxgrid_clon[nn] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - pxgrid_clat[nn] = poly_ctrlat (x_out, y_out, n_out ); - pi_in[nn] = i1; - pj_in[nn] = j1; - pi_out[nn] = i2; - pj_out[nn] = j2; - } - } - } - } - } - - /*copy data if nblocks > 1 */ - if(nblocks == 1) { - nxgrid = pnxgrid[0]; - pi_in = NULL; - pj_in = NULL; - pi_out = NULL; - pj_out = NULL; - pxgrid_area = NULL; - pxgrid_clon = NULL; - pxgrid_clat = NULL; - } - else { - int nn, i; - nxgrid = 0; - for(m=0; mdcentroid_lat[:nxcells], \ + interp_for_itile->dcentroid_lat[:nxcells], \ + input_grid_lon[:input_grid_ncells], \ + input_grid_lat[:input_grid_ncells], \ + summed_input_area[:input_grid_ncells], \ + summed_input_clon[:input_grid_ncells], \ + summed_input_clat[:input_grid_ncells], \ + interp_for_itile->input_parent_cell_index[:nxcells]) \ + copyin(readin_input_area[:input_grid_ncells]) + for(int ix=0 ; ixinput_parent_cell_index[ix]; + double input_area = summed_input_area[ij1]; + double input_clon = summed_input_clon[ij1]; + double input_clat = summed_input_clat[ij1]; + double readin_area = readin_input_area[ij1]; + if(fabs(input_area - readin_area)/readin_area > AREA_RATIO) { + double x[4], y[4], input_cell_lon_cent; + get_cell_vertices_acc(ij1, nlon_input_cells, input_grid_lon, input_grid_lat, x, y); + int n = fix_lon_acc(x, y, 4, M_PI); + input_cell_lon_cent = avgval_double_acc(n, x); + poly_ctrlon_acc(x, y, n, input_cell_lon_cent, &input_clon); + poly_ctrlat_acc(x, y, n, &input_clat); + input_area = readin_area; } + interp_for_itile->dcentroid_lon[ix] -= input_clon/input_area; + interp_for_itile->dcentroid_lat[ix] -= input_clat/input_area; } - free(pi_in); - free(pj_in); - free(pi_out); - free(pj_out); - free(pxgrid_area); - free(pxgrid_clon); - free(pxgrid_clat); - } - - free(area_in); - free(area_out); - free(lon_out_min_list); - free(lon_out_max_list); - free(lat_out_min_list); - free(lat_out_max_list); - free(lon_out_avg); - free(n2_list); - free(lon_out_list); - free(lat_out_list); - return nxgrid; +#pragma acc exit data delete( parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + store_xcell_area[:upbound_nxcells], \ + nxcells_per_ij1[:input_grid_ncells], \ + store_xcell_dclon[:upbound_nxcells], \ + store_xcell_dclat[:upbound_nxcells], \ + summed_input_area[:input_grid_ncells], \ + summed_input_clon[:input_grid_ncells], \ + summed_input_clat[:input_grid_ncells]) + + free(parent_input_index) ; parent_input_index = NULL; + free(parent_output_index) ; parent_output_index = NULL; + free(store_xcell_area) ; store_xcell_area = NULL; + free(nxcells_per_ij1) ; nxcells_per_ij1 = NULL; + free(store_xcell_dclon) ; store_xcell_dclon = NULL; + free(store_xcell_dclat) ; store_xcell_dclat = NULL; + free(summed_input_area); summed_input_area = NULL; + free(summed_input_clon); summed_input_clon = NULL; + free(summed_input_clat); summed_input_clat = NULL; + + return nxcells; };/* get_xgrid_2Dx2D_order2 */ - -int create_xgrid_great_circle_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, +int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_input_cells, + const int *nlon_output_cells, const int *nlat_output_cells, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const double *mask_input_grid, + int *i_in, int *j_in, int *i_out, int *j_out, double *xgrid_area, double *xgrid_clon, double *xgrid_clat) { @@ -1056,10 +507,10 @@ int create_xgrid_great_circle_acc(const int *nlon_in, const int *nlat_in, const double xctrlon, xctrlat; double *area1, *area2, min_area; - nx1 = *nlon_in; - ny1 = *nlat_in; - nx2 = *nlon_out; - ny2 = *nlat_out; + nx1 = *nlon_input_cells; + ny1 = *nlat_input_cells; + nx2 = *nlon_output_cells; + ny2 = *nlat_output_cells; nxgrid = 0; nx1p = nx1 + 1; nx2p = nx2 + 1; @@ -1074,57 +525,58 @@ int create_xgrid_great_circle_acc(const int *nlon_in, const int *nlat_in, const y2 = (double *)malloc(nx2p*ny2p*sizeof(double)); z2 = (double *)malloc(nx2p*ny2p*sizeof(double)); - latlon2xyz(nx1p*ny1p, lon_in, lat_in, x1, y1, z1); - latlon2xyz(nx2p*ny2p, lon_out, lat_out, x2, y2, z2); + latlon2xyz_acc(nx1p*ny1p, input_grid_lon, input_grid_lat, x1, y1, z1); + latlon2xyz_acc(nx2p*ny2p, output_grid_lon, output_grid_lat, x2, y2, z2); area1 = (double *)malloc(nx1*ny1*sizeof(double)); area2 = (double *)malloc(nx2*ny2*sizeof(double)); - get_grid_great_circle_area(nlon_in, nlat_in, lon_in, lat_in, area1); - get_grid_great_circle_area(nlon_out, nlat_out, lon_out, lat_out, area2); + get_grid_great_circle_area_acc(nlon_input_cells, nlat_input_cells, input_grid_lon, input_grid_lat, area1); + get_grid_great_circle_area_acc(nlon_output_cells, nlat_output_cells, output_grid_lon, output_grid_lat, area2); n1_in = 4; n2_in = 4; - for(j1=0; j1 MASK_THRESH ) { - /* clockwise */ - n0 = j1*nx1p+i1; n1 = (j1+1)*nx1p+i1; - n2 = (j1+1)*nx1p+i1+1; n3 = j1*nx1p+i1+1; - x1_in[0] = x1[n0]; y1_in[0] = y1[n0]; z1_in[0] = z1[n0]; - x1_in[1] = x1[n1]; y1_in[1] = y1[n1]; z1_in[1] = z1[n1]; - x1_in[2] = x1[n2]; y1_in[2] = y1[n2]; z1_in[2] = z1[n2]; - x1_in[3] = x1[n3]; y1_in[3] = y1[n3]; z1_in[3] = z1[n3]; - - for(j2=0; j2 0) { - xarea = great_circle_area ( n_out, x_out, y_out, z_out ) * mask_in[j1*nx1+i1]; - min_area = min(area1[j1*nx1+i1], area2[j2*nx2+i2]); - if( xarea/min_area > AREA_RATIO_THRESH ) { + for(int j1=0; j1 MASK_THRESH ) { + /* clockwise */ + n0 = j1*nx1p+i1; n1 = (j1+1)*nx1p+i1; + n2 = (j1+1)*nx1p+i1+1; n3 = j1*nx1p+i1+1; + x1_in[0] = x1[n0]; y1_in[0] = y1[n0]; z1_in[0] = z1[n0]; + x1_in[1] = x1[n1]; y1_in[1] = y1[n1]; z1_in[1] = z1[n1]; + x1_in[2] = x1[n2]; y1_in[2] = y1[n2]; z1_in[2] = z1[n2]; + x1_in[3] = x1[n3]; y1_in[3] = y1[n3]; z1_in[3] = z1[n3]; + + for(j2=0; j2 0) { + xarea = great_circle_area_acc( n_out, x_out, y_out, z_out ) ; + min_area = min(area1[j1*nx1+i1], area2[j2*nx2+i2]); + if( xarea/min_area > AREA_RATIO_THRESH ) { #ifdef debug_test_create_xgrid - printf("(i2,j2)=(%d,%d), (i1,j1)=(%d,%d), xarea=%g\n", i2, j2, i1, j1, xarea); + printf("(i2,j2)=(%d,%d), (i1,j1)=(%d,%d), xarea=%g\n", i2, j2, i1, j1, xarea); #endif - xgrid_area[nxgrid] = xarea; - xgrid_clon[nxgrid] = 0; /*z1l: will be developed very soon */ - xgrid_clat[nxgrid] = 0; - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; - ++nxgrid; - if(nxgrid > MAXXGRID) error_handler("nxgrid is greater than MAXXGRID, increase MAXXGRID"); - } + xgrid_area[nxgrid] = xarea; + xgrid_clon[nxgrid] = 0; /*z1l: will be developed very soon */ + xgrid_clat[nxgrid] = 0; + i_in[nxgrid] = i1; + j_in[nxgrid] = j1; + i_out[nxgrid] = i2; + j_out[nxgrid] = j2; + ++nxgrid; } } - } + } + } + } free(area1); diff --git a/tools/libfrencutils_acc/create_xgrid_acc.h b/tools/libfrencutils_acc/create_xgrid_acc.h index 98dc9d39..6047b1bc 100644 --- a/tools/libfrencutils_acc/create_xgrid_acc.h +++ b/tools/libfrencutils_acc/create_xgrid_acc.h @@ -20,77 +20,43 @@ #ifndef CREATE_XGRID_ACC_H_ #define CREATE_XGRID_ACC_H_ -#ifndef MAXXGRID -#define MAXXGRID 1e6 -#endif - -#define MV 50 -/* this value is small compare to earth area */ - -double poly_ctrlon_acc(const double lon[], const double lat[], int n, double clon); - -double poly_ctrlat_acc(const double lon[], const double lat[], int n); - -double box_ctrlon_acc(double ll_lon, double ll_lat, double ur_lon, double ur_lat, double clon); - -double box_ctrlat_acc(double ll_lon, double ll_lat, double ur_lon, double ur_lat); - -int get_maxxgrid_acc(void); - -void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); - -void get_grid_great_circle_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); - -//void get_grid_area_dimensionless(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); - -void get_grid_area_no_adjust_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); - -int clip_acc(const double lon_in[], const double lat_in[], int n_in, double ll_lon, double ll_lat, - double ur_lon, double ur_lat, double lon_out[], double lat_out[]); - -void pimod_acc(double x[],int nn); - -int clip_2dx2d_acc(const double lon1_in[], const double lat1_in[], int n1_in, - const double lon2_in[], const double lat2_in[], int n2_in, - double lon_out[], double lat_out[]); - -int create_xgrid_1dx2d_order_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, - const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area); - -int create_xgrid_1dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat); - -int create_xgrid_2dx1d_order1_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, - const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area); - -int create_xgrid_2dx1d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat); - -int create_xgrid_2dx2d_order1_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, - int *j_out, double *xgrid_area); - -int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat); - -int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, - const double x2_in[], const double y2_in[], const double z2_in [], int n2_in, - double x_out[], double y_out[], double z_out[]); - -int create_xgrid_great_circle_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, - const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat); +#include "globals_acc.h" + +int get_upbound_nxcells_2dx2d_acc(const int nlon_input_cells, const int nlat_input_cells, + const int nlon_output_cells, const int nlat_output_cells, + const int jlat_overlap_starts, const int jlat_overlap_ends, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const double *skip_input_cells, + const Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end); + +int create_xgrid_2dx2d_order1_acc(const int nlon_input_cells, const int nlat_input_cells, + const int nlon_output_cells, const int nlat_output_cells, + const int jlat_overlap_starts, const int jlat_overlap_ends, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const int upbound_nxcells, const double *skip_input_cells, + const Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Interp_per_input_tile *interp_for_input_tile); + +int create_xgrid_2dx2d_order2_acc(const int nlon_input_cells, const int nlat_input_cells, + const int nlon_output_cells, const int nlat_output_cells, + const int jlat_overlap_starts, const int jlat_overlap_ends, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const int upbound_nxcells, const double *skip_input_cells, + const Grid_cells_struct_config *output_grid_cells, + int *approx_nxcells_per_ij1, int *ij2_start, int *ij2_end, + Interp_per_input_tile *interp_for_input_tile, double *readin_input_area); + +int create_xgrid_great_circle_acc(const int *nlon_input_cells, const int *nlat_input_cells, + const int *nlon_output_cells, const int *nlat_output_cells, + const double *input_grid_lon, const double *input_grid_lat, + const double *output_grid_lon, const double *output_grid_lat, + const double *skip_input_cells, + int *i_in, int *j_in, int *i_out, int *j_out, + double *xgrid_area, double *xgrid_clon, double *xgrid_clat); #endif diff --git a/tools/libfrencutils_acc/create_xgrid_utils_acc.c b/tools/libfrencutils_acc/create_xgrid_utils_acc.c new file mode 100644 index 00000000..c7f305c6 --- /dev/null +++ b/tools/libfrencutils_acc/create_xgrid_utils_acc.c @@ -0,0 +1,882 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ +#include +#include +#include +#include +#include "general_utils_acc.h" +#include "create_xgrid_utils_acc.h" +#include "globals_acc.h" + +/******************************************************************************* +void get_grid_area(const int *nlon, const int *nlat, const double *lon, const double *lat, const double *area) + return the grid area. +*******************************************************************************/ +void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area) +{ + int nx, ny, nxp, i, j, n_in; + double x_in[20], y_in[20]; + + nx = *nlon; + ny = *nlat; + nxp = nx + 1; + + for(j=0; j M_PI) dphi = dphi - 2.0*M_PI; + if(dphi < -M_PI) dphi = dphi + 2.0*M_PI; + dphi1 = phi1 - clon_in; + if( dphi1 > M_PI) dphi1 -= 2.0*M_PI; + if( dphi1 <-M_PI) dphi1 += 2.0*M_PI; + dphi2 = phi2 -clon_in; + if( dphi2 > M_PI) dphi2 -= 2.0*M_PI; + if( dphi2 <-M_PI) dphi2 += 2.0*M_PI; + + if(fabs(dphi2 -dphi1) < M_PI) *ctrlon -= dphi * (dphi1*f1+dphi2*f2)/2.0; + else { + fac = -M_PI; if(dphi1 > 0.0) fac = M_PI; + fint = f1 + (f2-f1)*(fac-dphi1)/fabs(dphi); + *ctrlon -= 0.5*dphi1*(dphi1-fac)*f1 - 0.5*dphi2*(dphi2+fac)*f2 + + 0.5*fac*(dphi1+dphi2)*fint; + } + } + *ctrlon = *ctrlon * RADIUS *RADIUS; + //return (ctrlon*RADIUS*RADIUS); +}; /* poly_ctrlon */ + +/*------------------------------------------------------------------------------ + double poly_ctrlat(const double x[], const double y[], int n) + This routine is used to calculate the latitude of the centroid + Reference: First- and Second-Order Conservative Remapping Schemes for Grids in + Spherical Coordinates, P. Jones, Monthly Weather Review, 1998, vol127, p2204 + The following is an implementation of equation (13) in the above paper: + \int lat.dA = \int_c [-cos(lat)-lat sin(lat)] dlon + It assumes the sides of the spherical polygons are line segments with tangent + (lat2-lat1)/(lon2-lon1) between a pair of vertices in approximating the above + line integral along the sides of the polygon \int_c. + ---------------------------------------------------------------------------*/ +void poly_ctrlat_acc(const double *x, const double *y, int n, double *ctrlat) +{ + + *ctrlat = 0.0; + for (int i=0;i M_PI) dx = dx - 2.0*M_PI; + if(dx <= -M_PI) dx = dx + 2.0*M_PI; // flip sign for dx=-pi to fix huge value see comments in function poly_area + + if ( fabs(hdy)< SMALL_VALUE ) /* cheap area calculation along latitude */ + *ctrlat -= dx*(2*cos(avg_y) + lat2*sin(avg_y) - cos(lat1) ); + else + *ctrlat -= dx*( (sin(hdy)/hdy)*(2*cos(avg_y) + lat2*sin(avg_y)) - cos(lat1) ); + } + *ctrlat = *ctrlat * RADIUS*RADIUS; + //if(fabs(ctrlat) > HPI) printf("WARNING poly_ctrlat: Large values for ctrlat: %19.15f\n", ctrlat); + //return (ctrlat*RADIUS*RADIUS); + +}; /* poly_ctrlat */ + +/******************************************************************************* + Revise Sutherland-Hodgeman algorithm to find the vertices of the overlapping + between any two grid boxes. It return the number of vertices for the exchange grid. +*******************************************************************************/ +int clip_2dx2d_acc(const double lon1_in[], const double lat1_in[], int n1_in, + const double lon2_in[], const double lat2_in[], int n2_in, + double lon_out[], double lat_out[]) +{ + double lon_tmp[MV], lat_tmp[MV]; + double lon2_tmp[MV], lat2_tmp[MV]; + double x1_0, y1_0, x1_1, y1_1, x2_0, y2_0, x2_1, y2_1; + double dx1, dy1, dx2, dy2, determ, ds1, ds2; + int i_out, n_out, inside_last, inside, i1, i2; + int gttwopi=0; + /* clip polygon with each boundary of the polygon */ + /* We treat lon1_in/lat1_in as clip polygon and lon2_in/lat2_in as subject polygon */ + n_out = n1_in; + for(i1=0; i1TPI || lon_tmp[i1]<0.0) gttwopi = 1; + } + for(i2=0; i2 and + should not parallel to the line between and + may need to consider truncation error */ + dy1 = y1_1-y1_0; + dy2 = y2_1-y2_0; + dx1 = x1_1-x1_0; + dx2 = x2_1-x2_0; + ds1 = y1_0*x1_1 - y1_1*x1_0; + ds2 = y2_0*x2_1 - y2_1*x2_0; + determ = dy2*dx1 - dy1*dx2; + if(fabs(determ) < EPSLN30) { + printf("ERROR the line between and should not parallel to " + "the line between and \n"); + } + lon_out[i_out] = (dx2*ds1 - dx1*ds2)/determ; + lat_out[i_out++] = (dy2*ds1 - dy1*ds2)/determ; + } + if(inside) { + lon_out[i_out] = x1_1; + lat_out[i_out++] = y1_1; + } + x1_0 = x1_1; + y1_0 = y1_1; + inside_last = inside; + } + if(!(n_out=i_out)) return 0; + for(i1=0; i1= max_x2+RANGE_CHECK_CRITERIA) return 0; + max_x1 = maxval_double_acc(n1_in, x1_in); + min_x2 = minval_double_acc(n2_in, x2_in); + if(min_x2 >= max_x1+RANGE_CHECK_CRITERIA) return 0; + + min_y1 = minval_double_acc(n1_in, y1_in); + max_y2 = maxval_double_acc(n2_in, y2_in); + if(min_y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0; + max_y1 = maxval_double_acc(n1_in, y1_in); + min_y2 = minval_double_acc(n2_in, y2_in); + if(min_y2 >= max_y1+RANGE_CHECK_CRITERIA) return 0; + + min_z1 = minval_double_acc(n1_in, z1_in); + max_z2 = maxval_double_acc(n2_in, z2_in); + if(min_z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0; + max_z1 = maxval_double_acc(n1_in, z1_in); + min_z2 = minval_double_acc(n2_in, z2_in); + if(min_z2 >= max_z1+RANGE_CHECK_CRITERIA) return 0; + + rewindList_acc(); + + grid1List = getNext_acc(); + grid2List = getNext_acc(); + intersectList = getNext_acc(); + polyList = getNext_acc(); + + /* insert points into SubjList and ClipList */ + for(i1=0; i1isInside = 1; + else + temp->isInside = 0; + temp = getNextNode_acc(temp); + } + + /* check if grid2List is inside grid1List */ + temp = grid2List; + while(temp) { + if(insidePolygon_acc(temp, grid1List)) + temp->isInside = 1; + else + temp->isInside = 0; + temp = getNextNode_acc(temp); + } + + /* make sure the grid box is clockwise */ + + /*make sure each polygon is convex, which is equivalent that the great_circle_area is positive */ + if( gridArea_acc(grid1List) <= 0 ) + printf("ERROR create_xgrid.c(clip_2dx2d_great_circle): grid box 1 is not convex\n"); + if( gridArea_acc(grid2List) <= 0 ) + printf("ERROR create_xgrid.c(clip_2dx2d_great_circle): grid box 2 is not convex\n"); + + /* get the coordinates from grid1List and grid2List. + Please not npts1 might not equal n1_in, npts2 might not equal n2_in because of pole + */ + + temp = grid1List; + for(i1=0; i1Next_acc; + } + temp = grid2List; + for(i2=0; i2Next_acc; + } + + firstIntersect=getNext_acc(); + curIntersect = getNext_acc(); + + /* first find all the intersection points */ + nintersect = 0; + for(i1=0; i1 1) { + getFirstInbound_acc(intersectList, firstIntersect); + if(firstIntersect->initialized) { + has_inbound = 1; + } + } + + /* when has_inbound == 0, get the grid1List and grid2List */ + if( !has_inbound && nintersect > 1) { + setInbound_acc(intersectList, grid1List); + getFirstInbound_acc(intersectList, firstIntersect); + if(firstIntersect->initialized) has_inbound = 1; + } + + /* if has_inbound = 1, find the overlapping */ + n_out = 0; + + if(has_inbound) { + maxiter1 = nintersect; + temp1 = getNode_acc(grid1List, *firstIntersect); + if( temp1 == NULL) { + double lon[10], lat[10]; + int i; + xyz2latlon_acc(n1_in, x1_in, y1_in, z1_in, lon, lat); + for(i=0; i< n1_in; i++) printf("lon1 = %g, lat1 = %g\n", lon[i]*R2D, lat[i]*R2D); + printf("\n"); + xyz2latlon_acc(n2_in, x2_in, y2_in, z2_in, lon, lat); + for(i=0; i< n2_in; i++) printf("lon2 = %g, lat2 = %g\n", lon[i]*R2D, lat[i]*R2D); + printf("\n"); + + printf("ERROR firstIntersect is not in the grid1List\n"); + } + addNode_acc(polyList, *firstIntersect); + nintersect--; + + /* Loop over the grid1List and grid2List to find again the firstIntersect */ + curList = grid1List; + curListNum = 0; + + /* Loop through curList to find the next intersection, the loop will end + when come back to firstIntersect + */ + copyNode_acc(curIntersect, *firstIntersect); + iter1 = 0; + found1 = 0; + + while( iter1 < maxiter1 ) { + /* find the curIntersect in curList and get the next intersection points */ + temp1 = getNode_acc(curList, *curIntersect); + temp2 = temp1->Next_acc; + if( temp2 == NULL ) temp2 = curList; + + maxiter2 = length_acc(curList); + found2 = 0; + iter2 = 0; + /* Loop until find the next intersection */ + while( iter2 < maxiter2 ) { + int temp2IsIntersect; + + temp2IsIntersect = 0; + if( isIntersect_acc( *temp2 ) ) { /* copy the point and switch to the grid2List */ + struct Node_acc *temp3; + + /* first check if temp2 is the firstIntersect */ + if( sameNode_acc( *temp2, *firstIntersect) ) { + found1 = 1; + break; + } + + temp3 = temp2->Next_acc; + if( temp3 == NULL) temp3 = curList; + if( temp3 == NULL) printf("ERROR creat_xgrid.c: temp3 can not be NULL\n"); + found2 = 1; + /* if next node is inside or an intersection, + need to keep on curList + */ + temp2IsIntersect = 1; + if( isIntersect_acc(*temp3) || (temp3->isInside == 1) ) found2 = 0; + } + if(found2) { + copyNode_acc(curIntersect, *temp2); + break; + } + else { + addNode_acc(polyList, *temp2); + if(temp2IsIntersect) { + nintersect--; + } + } + temp2 = temp2->Next_acc; + if( temp2 == NULL ) temp2 = curList; + iter2 ++; + } // while( iter2 0) printf("ERROR After clipping, nintersect should be 0\n"); + + /* copy the polygon to x_out, y_out, z_out */ + temp1 = polyList; + while (temp1 != NULL) { + getCoordinate_acc(*temp1, x_out+n_out, y_out+n_out, z_out+n_out); + temp1 = temp1->Next_acc; + n_out++; + } + + /* if(n_out < 3) error_handler(" The clipped region has < 3 vertices"); */ + if( n_out < 3) n_out = 0; + } //if(inbound) + + /* check if grid1 is inside grid2 */ + if(n_out==0){ + /* first check number of points in grid1 is inside grid2 */ + int n, n1in2; + /* One possible is that grid1List is inside grid2List */ + n1in2 = 0; + temp = grid1List; + while(temp) { + if(temp->intersect != 1) { + if( temp->isInside == 1) n1in2++; + } + temp = getNextNode_acc(temp); + } + if(npts1==n1in2) { /* grid1 is inside grid2 */ + n_out = npts1; + n = 0; + temp = grid1List; + while( temp ) { + getCoordinate_acc(*temp, &x_out[n], &y_out[n], &z_out[n]); + n++; + temp = getNextNode_acc(temp); + } + } + if(n_out>0) return n_out; + } + + /* check if grid2List is inside grid1List */ + if(n_out ==0){ + int n, n2in1; + + temp = grid2List; + n2in1 = 0; + while(temp) { + if(temp->intersect != 1) { + if( temp->isInside == 1) n2in1++; + } + temp = getNextNode_acc(temp); + } + + if(npts2==n2in1) { /* grid2 is inside grid1 */ + n_out = npts2; + n = 0; + temp = grid2List; + while( temp ) { + getCoordinate_acc(*temp, &x_out[n], &y_out[n], &z_out[n]); + n++; + temp = getNextNode_acc(temp); + } + + } + } + + return n_out; +} + +void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config *output_grid, + Grid_cells_struct_config *grid_cells ) +{ + + double *lon = output_grid->lonc; + double *lat = output_grid->latc; + + int ncells=nlon*nlat; + int npts=(nlon+1)*(nlat+1); + + grid_cells->lon_min = (double *)malloc(ncells*sizeof(double)); + grid_cells->lon_max = (double *)malloc(ncells*sizeof(double)); + grid_cells->lat_min = (double *)malloc(ncells*sizeof(double)); + grid_cells->lat_max = (double *)malloc(ncells*sizeof(double)); + grid_cells->lon_cent = (double *)malloc(ncells*sizeof(double)); + grid_cells->area = (double *)malloc(ncells*sizeof(double)); + grid_cells->nvertices = (int *)malloc(ncells*sizeof(int)); + grid_cells->lon_vertices = (double **)malloc(ncells*sizeof(double)); + grid_cells->lat_vertices = (double **)malloc(ncells*sizeof(double)); + for(int icell=0 ; icelllat_vertices[icell] = (double *)malloc(MAX_V*sizeof(double)); + grid_cells->lon_vertices[icell] = (double *)malloc(MAX_V*sizeof(double)); + } + +#pragma acc enter data create(grid_cells[:1]) +#pragma acc enter data create(grid_cells->lon_min[:ncells], grid_cells->lon_max[:ncells], \ + grid_cells->lat_min[:ncells], grid_cells->lat_max[:ncells], \ + grid_cells->lon_cent[:ncells], grid_cells->nvertices[:ncells],\ + grid_cells->area[:ncells]) +#pragma acc enter data create(grid_cells->lon_vertices[:ncells][:MAX_V], \ + grid_cells->lat_vertices[:ncells][:MAX_V]) + +#pragma acc data present(grid_cells[:1], lon[:npts], lat[:npts]) +#pragma acc parallel loop independent + for(int icell=0; icelllat_min[icell] = minval_double_acc(4, lat_vertices); + grid_cells->lat_max[icell] = maxval_double_acc(4, lat_vertices); + + nvertices = fix_lon_acc(lon_vertices, lat_vertices, 4, M_PI); + grid_cells->nvertices[icell] = nvertices; + + if(nvertices>MAX_V) printf("ERROR get_cell_minmaxavg_latlons: number of cell vertices is greater than MAX_V\n"); + grid_cells->lon_min[icell] = minval_double_acc(nvertices, lon_vertices); + grid_cells->lon_max[icell] = maxval_double_acc(nvertices, lon_vertices); + grid_cells->lon_cent[icell] = avgval_double_acc(nvertices, lon_vertices); + + grid_cells->area[icell] = poly_area_acc(lon_vertices, lat_vertices, nvertices); + + for(int ivertex=0 ; ivertexlon_vertices[icell][ivertex] = lon_vertices[ivertex]; + grid_cells->lat_vertices[icell][ivertex] = lat_vertices[ivertex]; + } + } + +} + +void free_grid_cell_struct_acc( const int ncells, Grid_cells_struct_config *grid_cells) +{ + + for(int icell=0 ; icelllon_vertices[icell]) + } + + for(int icell=0 ; icelllat_vertices[icell]) + } + +#pragma acc exit data delete( grid_cells->lon_vertices, \ + grid_cells->lat_vertices, \ + grid_cells->lon_min, \ + grid_cells->lon_max, \ + grid_cells->lon_cent, \ + grid_cells->lat_max, \ + grid_cells->lat_min, \ + grid_cells->nvertices, \ + grid_cells->area) +#pragma acc exit data delete(grid_cells) + + for(int icell=0 ; icelllon_vertices[icell]); + free(grid_cells->lat_vertices[icell]); + } + free(grid_cells->lon_min); grid_cells->lon_min = NULL; + free(grid_cells->lon_max); grid_cells->lon_max = NULL; + free(grid_cells->lon_cent); grid_cells->lon_cent = NULL; + free(grid_cells->lat_min); grid_cells->lat_min = NULL; + free(grid_cells->lat_max); grid_cells->lat_max = NULL; + free(grid_cells->area); grid_cells->area = NULL; + free(grid_cells->nvertices); grid_cells->nvertices=NULL; + //grid_cells->lon_vertices = NULL; + //grid_cells->lat_vertices = NULL; +} + + +void get_cell_vertices_acc( const int icell, const int nlon, const double *lon, const double *lat, double *x, double *y ) +{ + + int i, j; + int n0, n1, n2, n3; + + i = icell%nlon; + j = icell/nlon; + n0 = j*(nlon+1)+i; + n1 = j*(nlon+1)+i+1; + n2 = (j+1)*(nlon+1)+i+1; + n3 = (j+1)*(nlon+1)+i; + + x[0] = lon[n0]; y[0] = lat[n0]; + x[1] = lon[n1]; y[1] = lat[n1]; + x[2] = lon[n2]; y[2] = lat[n2]; + x[3] = lon[n3]; y[3] = lat[n3]; + +} + +void create_upbound_nxcells_arrays_on_device_acc(const int n, int **approx_nxcells_per_ij1, + int **ij2_start, int **ij2_end) +{ + + int *p_approx_nxcells_per_ij1; + int *p_ij2_start; + int *p_ij2_end; + + *approx_nxcells_per_ij1 = (int *)malloc(n*sizeof(int)); + *ij2_start = (int *)malloc(n*sizeof(int)); + *ij2_end = (int *)malloc(n*sizeof(int)); + + p_approx_nxcells_per_ij1 = *approx_nxcells_per_ij1; + p_ij2_start = *ij2_start; + p_ij2_end = *ij2_end; + +#pragma acc enter data copyin(p_approx_nxcells_per_ij1[:n], \ + p_ij2_start[:n], \ + p_ij2_end[:n]) + +} + +void free_upbound_nxcells_arrays_acc( const int n, int **approx_nxcells_per_ij1, + int **ij2_start, int **ij2_end) +{ + int *p_approx_nxcells_per_ij1; + int *p_ij2_start ; + int *p_ij2_end; + + p_approx_nxcells_per_ij1 = *approx_nxcells_per_ij1; + p_ij2_start = *ij2_start; + p_ij2_end = *ij2_end; + +#pragma acc exit data delete(p_approx_nxcells_per_ij1[:n], \ + p_ij2_start[:n], \ + p_ij2_end[:n]) + + free(*approx_nxcells_per_ij1); *approx_nxcells_per_ij1 = NULL; + free(*ij2_start) ; *ij2_start = NULL; + free(*ij2_end) ; *ij2_end = NULL; +} + +void copy_data_to_interp_on_device_acc(const int nxcells, const int input_ncells, const int upbound_nxcells, + int *xcells_per_ij1, double *xcell_dclon, double *xcell_dclat, + int *approx_xcells_per_ij1, int *parent_input_index, int *parent_output_index, + double *xcell_areas, Interp_per_input_tile *interp_for_input_tile) +{ + int copy_xcentroid = ( xcell_dclon[0] != -99.99 ) ? 1 : 0; //true=1; false=0 + interp_for_input_tile->nxcells = nxcells; + + if(nxcells>0) { + + interp_for_input_tile->input_parent_cell_index = (int *)malloc(nxcells*sizeof(int)); + interp_for_input_tile->output_parent_cell_index = (int *)malloc(nxcells*sizeof(int)); + interp_for_input_tile->xcell_area = (double *)malloc(nxcells*sizeof(double)); + if(copy_xcentroid) { + interp_for_input_tile->dcentroid_lon = (double *)malloc(nxcells*sizeof(double)); + interp_for_input_tile->dcentroid_lat = (double *)malloc(nxcells*sizeof(double)); + } + +#pragma acc enter data copyin(interp_for_input_tile[:1]) +#pragma acc enter data create(interp_for_input_tile->input_parent_cell_index[:nxcells], \ + interp_for_input_tile->output_parent_cell_index[:nxcells], \ + interp_for_input_tile->xcell_area[:nxcells]) +#pragma acc enter data if(copy_xcentroid) create(interp_for_input_tile->dcentroid_lon[:nxcells], \ + interp_for_input_tile->dcentroid_lat[:nxcells]) + +#pragma acc data present(xcells_per_ij1[:input_ncells], approx_xcells_per_ij1[:input_ncells], \ + parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + xcell_areas[:upbound_nxcells], interp_for_input_tile[:1]) +#pragma acc parallel loop independent async(0) + for(int ij1=0 ; ij1input_parent_cell_index[xcells_before_ij1+i] = parent_input_index[approx_xcells+i]; + interp_for_input_tile->output_parent_cell_index[xcells_before_ij1+i] = parent_output_index[approx_xcells+i]; + interp_for_input_tile->xcell_area[xcells_before_ij1+i] = xcell_areas[approx_xcells+i]; + } + } + + if(copy_xcentroid==1) { +#pragma acc data present(xcells_per_ij1[:input_ncells], approx_xcells_per_ij1[:input_ncells], \ + parent_input_index[:upbound_nxcells], \ + parent_output_index[:upbound_nxcells], \ + xcell_areas[:upbound_nxcells], interp_for_input_tile[:1], \ + xcell_dclon[:upbound_nxcells], xcell_dclat[:upbound_nxcells]) +#pragma acc parallel loop independent async(1) + for(int ij1=0 ; ij1dcentroid_lon[xcells_before_ij1+i] = xcell_dclon[approx_xcells+i]; + interp_for_input_tile->dcentroid_lat[xcells_before_ij1+i] = xcell_dclat[approx_xcells+i]; + } + } + } + +#pragma acc wait + + }//if nxcells>0 + + } diff --git a/tools/libfrencutils_acc/create_xgrid_utils_acc.h b/tools/libfrencutils_acc/create_xgrid_utils_acc.h new file mode 100644 index 00000000..d5c310a5 --- /dev/null +++ b/tools/libfrencutils_acc/create_xgrid_utils_acc.h @@ -0,0 +1,65 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ +#ifndef CREATE_XGRID_UTILS_ACC_H_ +#define CREATE_XGRID_UTILS_ACC_H_ + +#include "globals_acc.h" + +#define MV 50 +/* this value is small compare to earth area */ + +void get_grid_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); + +void get_grid_great_circle_area_acc(const int *nlon, const int *nlat, const double *lon, const double *lat, double *area); + +#pragma acc routine seq +void poly_ctrlon_acc(const double *x, const double *y, int n, double clon_in, double *crtlon); + +#pragma acc routine seq +void poly_ctrlat_acc(const double *x, const double *y, int n, double *crtlat); + +#pragma acc routine seq +int clip_2dx2d_acc(const double lon1_in[], const double lat1_in[], int n1_in, + const double lon2_in[], const double lat2_in[], int n2_in, + double lon_out[], double lat_out[]); + +int clip_2dx2d_great_circle_acc(const double x1_in[], const double y1_in[], const double z1_in[], int n1_in, + const double x2_in[], const double y2_in[], const double z2_in [], int n2_in, + double x_out[], double y_out[], double z_out[]); + +void get_grid_cell_struct_acc( const int nlon, const int nlat, const Grid_config *output_grid, + Grid_cells_struct_config *grid_cells); + +void free_grid_cell_struct_acc( const int ncells, Grid_cells_struct_config *grid_cells); + +#pragma acc routine seq +void get_cell_vertices_acc( const int ij, const int nlon, const double *lon, const double *lat, double *x, double *y ); + +void create_upbound_nxcells_arrays_on_device_acc(const int n, int **approx_nxcells_per_ij1, + int **ij2_start, int **ij2_end); + +void free_upbound_nxcells_arrays_acc( const int n, int **approx_nxcells_per_ij1, + int **ij2_start, int **ij2_end); + +void copy_data_to_xgrid_on_device_acc(const int nxcells, const int input_ncells, const int upbound_nxcells, + int *xcells_per_ij1, double *xcell_clon, double *xcell_clat, + int *approx_xcells_per_ij1, int *parent_input_indices, int *parent_output_indices, + double *xcell_areas, Interp_per_input_tile *interp_for_input_tile); +#endif diff --git a/tools/libfrencutils_acc/general_utils_acc.c b/tools/libfrencutils_acc/general_utils_acc.c new file mode 100644 index 00000000..ffe8ac8e --- /dev/null +++ b/tools/libfrencutils_acc/general_utils_acc.c @@ -0,0 +1,1522 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +/** + * \author Zhi Liang +*/ +#include +#include +#include +#include +#include "general_utils_acc.h" +#include "globals_acc.h" + +const double from_pole_threshold_rad_acc = 0.0174533; // 1.0 deg + +//reproduce siena was removed +int rotate_poly_flag_acc = 0; +double the_rotation_matrix_acc[3][3] = { 0 }; + +#pragma acc routine seq +void set_rotate_poly_true_acc(void){ + rotate_poly_flag_acc = 1; + set_the_rotation_matrix_acc(); +} + +struct Node_acc *nodeList_acc=NULL; +int curListPos_acc=0; + +#pragma acc declare copyin(from_pole_threshold_rad_acc) +#pragma acc declare copyin(rotate_poly_flag_acc) +#pragma acc declare copyin(nodeList_acc) +#pragma acc declare copyin(curListPos_acc) +#pragma acc declare copyin(the_rotation_matrix_acc) + +/********************************************************************* + + int nearest_index(double value, const double *array, int ia) + + return index of nearest data point within "array" corresponding to "value". + if "value" is outside the domain of "array" then nearest_index = 0 + or = size(array)-1 depending on whether array(0) or array(ia-1) is + closest to "value" + + Arguments: + value: arbitrary data...same units as elements in "array" + array: array of data points (must be monotonically increasing) + ia : size of array. + ********************************************************************/ +int nearest_index_acc(double value, const double *array, int ia) +{ + int index, i; + int keep_going; + + for(i=1; i array[ia-1]) index = ia-1; + else { + i=0; + keep_going = 1; + while (i < ia && keep_going) { + i = i+1; + if (value <= array[i]) { + index = i; + if (array[i]-value > value-array[i-1]) index = i-1; + keep_going = 0; + } + } + } + return index; + +} + +/******************************************************************************* + double maxval_double(int size, double *data) + get the maximum value of double array +*******************************************************************************/ +double maxval_double_acc(int size, const double *data) +{ + int n; + double maxval; + + maxval = data[0]; + for(n=1; n maxval ) maxval = data[n]; + } + + return maxval; + +} /* maxval_double */ + + +/******************************************************************************* + double minval_double(int size, double *data) + get the minimum value of double array +*******************************************************************************/ +double minval_double_acc(int size, const double *data) +{ + int n; + double minval; + + minval = data[0]; + for(n=1; n M_PI) + dx = dx - 2.0 * M_PI; + if (dx < -M_PI) + dx = dx + 2.0 * M_PI; + /*Sides that go through a pole contribute PI regardless of their direction and extent.*/ + if (fabs(dx + M_PI) < SMALL_VALUE || fabs(dx - M_PI) < SMALL_VALUE) { + area += M_PI; + continue; // next i + } + /* + We have to be careful in implementing sin(0.5*(lat2-lat1))/(0.5*(lat2-lat1)) + in the limit of lat1=lat2 where it becomes 1. Note that in that limit we get the well known formula + for the area of a section of sphere bounded by two longitudes and two latitude circles. + */ + if (fabs(lat1 - lat2) < SMALL_VALUE) /* cheap area calculation along latitude */ + area -= dx * sin(0.5 * (lat1 + lat2)); + else { + dy = 0.5 * (lat1 - lat2); + dat = sin(dy) / dy; + area -= dx * sin(0.5 * (lat1 + lat2)) * dat; + } + } + + + if (fabs(area) > HPI) printf("WARNING poly_area: Large values for area: %19.15f\n", area); + if (area < 0) return -area * RADIUS * RADIUS; + else return area * RADIUS * RADIUS; +} /* poly_area_main */ + +/* + Calculate the area of a polygon with the original poly_area function, + exept that for those polygons that are polar, if the global poly_are_flag + is set, the area is calculated by first rotating a copy of the polygon away + from the pole and calculating the area of the rotated polygon instead. + Polar means having any of its verices cloase to a pole, or an edge crossing + a pole. + + This routine is here merely for improving the accuracy of the are calculation + in the given conditions. + TODO: The tiling error reported by make_coupler mosaic may be non-zero when + using this feature. This may be developped in the future. +*/ +double poly_area_acc(const double xo[], const double yo[], int n) { + double area_pa = 0.0; + double area_par = 0.0; + int pole = 0; + int crosses = 0; + + double xr[8]; // rotated lon + double yr[8]; // rotated lat + + if (rotate_poly_flag_acc == 0) { + area_pa = poly_area_main_acc(xo, yo, n); + return area_pa; + } + else { + // anything near enough to the pole gets rotated + pole = is_near_pole_acc(yo, n); + crosses = crosses_pole_acc(xo, n); + if (crosses == 1 && pole == 0) { + printf("ERROR poly_area: crosses == 1 && pole == 0\n"); + } + + if (pole == 1) { + if (n > 8) { + printf("ERROR poly_area: n > 8. n=%d,n\n"); + } + rotate_poly_acc(xo, yo, n, xr, yr); + + int pole2 = is_near_pole_acc(yr, n); + if (pole2 == 1) { + printf("ERROR poly_area: pole2 == 1\n"); + } + area_par = poly_area_main_acc(xr, yr, n); + } else { + area_pa = poly_area_main_acc(xo, yo, n); + } + + if (pole == 1) { + return area_par; + } else { + return area_pa; + } + } +} + +int delete_vtx_acc(double x[], double y[], int n, int n_del) +{ + for (;n_del=n_ins;i--) { + x[i+1] = x[i]; + y[i+1] = y[i]; + } + + x[n_ins] = lon_in; + y[n_ins] = lat_in; + return (n+1); +} /* insert_vtx */ + +void v_print_acc(double x[], double y[], int n) +{ + int i; + + for (i=0;i=HPI-TOLERANCE) pole = 1; + if (0&&pole) { + printf("fixing pole cell\n"); + v_print_acc(x, y, nn); + printf("---------"); + } + + /* all pole points must be paired */ + /* The reason is poly_area() function needs a contribution equal to the angle (in radians) + between the sides that connect to the pole. */ + for (i=0;i=HPI-TOLERANCE) { + int im=(i+nn-1)%nn, ip=(i+1)%nn; + + if (y[im]==y[i] && y[ip]==y[i]) { + nn = delete_vtx_acc(x, y, nn, i); + i--; + } + else if (y[im]!=y[i] && y[ip]!=y[i]) { + nn = insert_vtx_acc(x, y, nn, i, x[i], y[i]); + i++; + } + } + /* first of pole pair has longitude of previous vertex */ + /* second of pole pair has longitude of subsequent vertex */ + for (i=0;i=HPI-TOLERANCE) { + int im=(i+nn-1)%nn, ip=(i+1)%nn; + + if (y[im]!=y[i]) x[i] = x[im]; + if (y[ip]!=y[i]) x[i] = x[ip]; + } + + /*If a polygon side passes through a Pole insert twin vertices at the Pole*/ + /*A fix is also directly applied to poly_area to handle this case.*/ + for (i=0;i M_PI) dx = dx - TPI; + x_sum += (x[i] = x[i-1] + dx); + } + + dx = (x_sum/nn)-tlon; + if (dx < -M_PI) for (i=0;i M_PI) for (i=0;i angle + \ + \ + p2 + -----------------------------------------------------------------------------*/ +double spherical_angle_acc(const double *v1, const double *v2, const double *v3) +{ + double angle; + double px, py, pz, qx, qy, qz, ddd; + + /* vector product between v1 and v2 */ + px = v1[1]*v2[2] - v1[2]*v2[1]; + py = v1[2]*v2[0] - v1[0]*v2[2]; + pz = v1[0]*v2[1] - v1[1]*v2[0]; + /* vector product between v1 and v3 */ + qx = v1[1]*v3[2] - v1[2]*v3[1]; + qy = v1[2]*v3[0] - v1[0]*v3[2]; + qz = v1[0]*v3[1] - v1[1]*v3[0]; + + ddd = (px*px+py*py+pz*pz)*(qx*qx+qy*qy+qz*qz); + if ( ddd <= 0.0 ) angle = 0. ; + else { + ddd = (px*qx+py*qy+pz*qz) / sqrt(ddd); + if( fabs(ddd-1) < EPSLN30 ) ddd = 1; + if( fabs(ddd+1) < EPSLN30 ) ddd = -1; + if ( ddd>1. || ddd<-1. ) { + /*FIX (lmh) to correctly handle co-linear points (angle near pi or 0) */ + if (ddd < 0.) angle = M_PI; + else angle = 0.; + } + else + angle = acos( ddd ); + } + + return angle; +} /* spherical_angle */ + +/*---------------------------------------------------------------------- + void vect_cross(e, p1, p2) + Perform cross products of 3D vectors: e = P1 X P2 + -------------------------------------------------------------------*/ +void vect_cross_acc(const double *p1, const double *p2, double *e ) +{ + + e[0] = p1[1]*p2[2] - p1[2]*p2[1]; + e[1] = p1[2]*p2[0] - p1[0]*p2[2]; + e[2] = p1[0]*p2[1] - p1[1]*p2[0]; + +} /* vect_cross */ + + +/*---------------------------------------------------------------------- + double* vect_cross(p1, p2) + return cross products of 3D vectors: = P1 X P2 + -------------------------------------------------------------------*/ +double dot_acc(const double *p1, const double *p2) +{ + + return( p1[0]*p2[0] + p1[1]*p2[1] + p1[2]*p2[2] ); + +} + +double metric_acc(const double *p) { + return (sqrt(p[0]*p[0] + p[1]*p[1]+p[2]*p[2]) ); +} + +/* Intersect a line and a plane + Intersects between the plane ( three points ) (entries in counterclockwise order) + and the line determined by the endpoints l1 and l2 (t=0.0 at l1 and t=1.0 at l2) + returns true if the two intersect and the output variables are valid + outputs p containing the coordinates in the tri and t the coordinate in the line + of the intersection. + NOTE: the intersection doesn't have to be inside the tri or line for this to return true +*/ +int intersect_tri_with_line_acc(const double *plane, const double *l1, const double *l2, double *p, + double *t) +{ + + double M[3*3], inv_M[3*3]; + double V[3]; + double X[3]; + int is_invert=0; + + const double *pnt0=plane; + const double *pnt1=plane+3; + const double *pnt2=plane+6; + + /* To do intersection just solve the set of linear equations for both + Setup M + */ + + M[0]=l1[0]-l2[0]; M[1]=pnt1[0]-pnt0[0]; M[2]=pnt2[0]-pnt0[0]; + M[3]=l1[1]-l2[1]; M[4]=pnt1[1]-pnt0[1]; M[5]=pnt2[1]-pnt0[1]; + M[6]=l1[2]-l2[2]; M[7]=pnt1[2]-pnt0[2]; M[8]=pnt2[2]-pnt0[2]; + /* Invert M */ + is_invert = invert_matrix_3x3_acc(M,inv_M); + if (!is_invert) return 0; + + /* Set variable holding vector */ + V[0]=l1[0]-pnt0[0]; + V[1]=l1[1]-pnt0[1]; + V[2]=l1[2]-pnt0[2]; + + /* Calculate solution */ + mult_acc(inv_M, V, X); + + /* Get answer out */ + *t=X[0]; + p[0]=X[1]; + p[1]=X[2]; + + return 1; +} + +void mult_acc(double m[], double v[], double out_v[]) +{ + + out_v[0]=m[0]*v[0]+m[1]*v[1]+m[2]*v[2]; + out_v[1]=m[3]*v[0]+m[4]*v[1]+m[5]*v[2]; + out_v[2]=m[6]*v[0]+m[7]*v[1]+m[8]*v[2]; + +} + +/* returns 1 if matrix is inverted, 0 otherwise */ +int invert_matrix_3x3_acc(double m[], double m_inv[]) +{ + + const double det = m[0] * (m[4]*m[8] - m[5]*m[7]) + -m[1] * (m[3]*m[8] - m[5]*m[6]) + +m[2] * (m[3]*m[7] - m[4]*m[6]); + + if (fabs(det) < EPSLN15 ) return 0; + + const double deti = 1.0/det; + + m_inv[0] = (m[4]*m[8] - m[5]*m[7]) * deti; + m_inv[1] = (m[2]*m[7] - m[1]*m[8]) * deti; + m_inv[2] = (m[1]*m[5] - m[2]*m[4]) * deti; + + m_inv[3] = (m[5]*m[6] - m[3]*m[8]) * deti; + m_inv[4] = (m[0]*m[8] - m[2]*m[6]) * deti; + m_inv[5] = (m[2]*m[3] - m[0]*m[5]) * deti; + + m_inv[6] = (m[3]*m[7] - m[4]*m[6]) * deti; + m_inv[7] = (m[1]*m[6] - m[0]*m[7]) * deti; + m_inv[8] = (m[0]*m[4] - m[1]*m[3]) * deti; + + return 1; +} + + + +void rewindList_acc(void) +{ + int n; + + curListPos_acc = 0; + if(!nodeList_acc) nodeList_acc = (struct Node_acc *)malloc(MAXNODELIST*sizeof(struct Node_acc)); + for(n=0; n MAXNODELIST) printf("ERROR getNext_acc: curListPos_acc >= MAXNODELIST\n"); + + return (temp); +} + + +void initNode_acc(struct Node_acc *node) +{ + node->x = 0; + node->y = 0; + node->z = 0; + node->u = 0; + node->intersect = 0; + node->inbound = 0; + node->isInside = 0; + node->Next_acc = NULL; + node->initialized=0; + +} + +void addEnd_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, int inside) +{ + + struct Node_acc *temp=NULL; + + if(list == NULL) printf("ERROR addEnd: list is NULL\n"); + + if(list->initialized) { + + /* (x,y,z) might already in the list when intersect is true and u=0 or 1 */ + temp = list; + while (temp) { + if(samePoint_acc(temp->x, temp->y, temp->z, x, y, z)) return; + temp=temp->Next_acc; + } + temp = list; + while(temp->Next_acc) + temp=temp->Next_acc; + + /* Append at the end of the list. */ + temp->Next_acc = getNext_acc(); + temp = temp->Next_acc; + } + else { + temp = list; + } + + temp->x = x; + temp->y = y; + temp->z = z; + temp->u = u; + temp->intersect = intersect; + temp->inbound = inbound; + temp->initialized=1; + temp->isInside = inside; +} + +/* return 1 if the point (x,y,z) is added in the list, return 0 if it is already in the list */ + +int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u1, double u2, int inbound, + int is1, int ie1, int is2, int ie2) +{ + + double u1_cur, u2_cur; + int i1_cur, i2_cur; + struct Node_acc *temp=NULL; + + if(list == NULL) printf("ERROR addEnd: list is NULL\n"); + + /* first check to make sure this point is not in the list */ + u1_cur = u1; + i1_cur = is1; + u2_cur = u2; + i2_cur = is2; + if(u1_cur == 1) { + u1_cur = 0; + i1_cur = ie1; + } + if(u2_cur == 1) { + u2_cur = 0; + i2_cur = ie2; + } + + if(list->initialized) { + temp = list; + while(temp) { + if( temp->u == u1_cur && temp->subj_index == i1_cur) return 0; + if( temp->u_clip == u2_cur && temp->clip_index == i2_cur) return 0; + if( !temp->Next_acc ) break; + temp=temp->Next_acc; + } + + /* Append at the end of the list. */ + temp->Next_acc = getNext_acc(); + temp = temp->Next_acc; + } + else { + temp = list; + } + + temp->x = x; + temp->y = y; + temp->z = z; + temp->intersect = intersect; + temp->inbound = inbound; + temp->initialized=1; + temp->isInside = 0; + temp->u = u1_cur; + temp->subj_index = i1_cur; + temp->u_clip = u2_cur; + temp->clip_index = i2_cur; + + return 1; +} + + +int length_acc(struct Node_acc *list) +{ + struct Node_acc *cur_ptr=NULL; + int count=0; + + cur_ptr=list; + + while(cur_ptr) + { + if(cur_ptr->initialized ==0) break; + cur_ptr=cur_ptr->Next_acc; + count++; + } + return(count); +} + +/* two points are the same if there are close enough */ +int samePoint_acc(double x1, double y1, double z1, double x2, double y2, double z2) +{ + if( fabs(x1-x2) > EPSLN10 || fabs(y1-y2) > EPSLN10 || fabs(z1-z2) > EPSLN10 ) + return 0; + else + return 1; +} + + + +int sameNode_acc(struct Node_acc node1, struct Node_acc node2) +{ + if( node1.x == node2.x && node1.y == node2.y && node1.z==node2.z ) + return 1; + else + return 0; +} + + +void addNode_acc(struct Node_acc *list, struct Node_acc inNode_acc) +{ + + addEnd_acc(list, inNode_acc.x, inNode_acc.y, inNode_acc.z, inNode_acc.intersect, inNode_acc.u, inNode_acc.inbound, inNode_acc.isInside); + +} + +struct Node_acc *getNode_acc(struct Node_acc *list, struct Node_acc inNode_acc) +{ + struct Node_acc *thisNode_acc=NULL; + struct Node_acc *temp=NULL; + + temp = list; + while( temp ) { + if( sameNode_acc( *temp, inNode_acc ) ) { + thisNode_acc = temp; + temp = NULL; + break; + } + temp = temp->Next_acc; + } + + return thisNode_acc; +} + +struct Node_acc *getNextNode_acc(struct Node_acc *list) +{ + return list->Next_acc; +} + +void copyNode_acc(struct Node_acc *node_out, struct Node_acc node_in) +{ + + node_out->x = node_in.x; + node_out->y = node_in.y; + node_out->z = node_in.z; + node_out->u = node_in.u; + node_out->intersect = node_in.intersect; + node_out->inbound = node_in.inbound; + node_out->Next_acc = NULL; + node_out->initialized = node_in.initialized; + node_out->isInside = node_in.isInside; +} + +void printNode_acc(struct Node_acc *list, char *str) +{ + struct Node_acc *temp; + + if(list == NULL) printf("ERROR printNode_acc: list is NULL\n"); + if(str) printf(" %s \n", str); + temp = list; + while(temp) { + if(temp->initialized ==0) break; + printf(" (x, y, z, interset, inbound, isInside) = (%19.15f,%19.15f,%19.15f,%d,%d,%d)\n", + temp->x, temp->y, temp->z, temp->intersect, temp->inbound, temp->isInside); + temp = temp->Next_acc; + } + printf("\n"); +} + +int intersectInList_acc(struct Node_acc *list, double x, double y, double z) +{ + struct Node_acc *temp; + int found=0; + + temp = list; + found = 0; + while ( temp ) { + if( temp->x == x && temp->y == y && temp->z == z ) { + found = 1; + break; + } + temp=temp->Next_acc; + } + if (!found) printf("ERROR intersectInList: point (x,y,z) is not found in the list\n"); + if( temp->intersect == 2 ) + return 1; + else + return 0; + +} + + +/* The following insert a intersection after non-intersect point (x2,y2,z2), if the point + after (x2,y2,z2) is an intersection, if u is greater than the u value of the intersection, + insert after, otherwise insert before +*/ +void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, double u1, double u2, int inbound, + double x2, double y2, double z2) +{ + struct Node_acc *temp1=NULL, *temp2=NULL; + struct Node_acc *temp; + double u_cur; + int found=0; + + temp1 = list; + found = 0; + while ( temp1 ) { + if( temp1->x == x2 && temp1->y == y2 && temp1->z == z2 ) { + found = 1; + break; + } + temp1=temp1->Next_acc; + } + if (!found) printf("ERROR inserAfter: point (x,y,z) is not found in the list\n"); + + /* when u = 0 or u = 1, set the grid point to be the intersection point to solve truncation error isuse */ + u_cur = u1; + if(u1 == 1) { + u_cur = 0; + temp1 = temp1->Next_acc; + if(!temp1) temp1 = list; + } + if(u_cur==0) { + temp1->intersect = 2; + temp1->isInside = 1; + temp1->u = u_cur; + temp1->x = x; + temp1->y = y; + temp1->z = z; + return; + } + + /* when u2 != 0 and u2 !=1, can decide if one end of the point is outside depending on inbound value */ + if(u2 != 0 && u2 != 1) { + if(inbound == 1) { /* goes outside, then temp1->Next_acc is an outside point */ + /* find the next non-intersect point */ + temp2 = temp1->Next_acc; + if(!temp2) temp2 = list; + while(temp2->intersect) { + temp2=temp2->Next_acc; + if(!temp2) temp2 = list; + } + + temp2->isInside = 0; + } + else if(inbound ==2) { /* goes inside, then temp1 is an outside point */ + temp1->isInside = 0; + } + } + + temp2 = temp1->Next_acc; + while ( temp2 ) { + if( temp2->intersect == 1 ) { + if( temp2->u > u_cur ) { + break; + } + } + else + break; + temp1 = temp2; + temp2 = temp2->Next_acc; + } + + /* assign value */ + temp = getNext_acc(); + temp->x = x; + temp->y = y; + temp->z = z; + temp->u = u_cur; + temp->intersect = 1; + temp->inbound = inbound; + temp->isInside = 1; + temp->initialized = 1; + temp1->Next_acc = temp; + temp->Next_acc = temp2; + +} + +double gridArea_acc(struct Node_acc *grid) { + double x[20], y[20], z[20]; + struct Node_acc *temp=NULL; + double area; + int n; + + temp = grid; + n = 0; + while( temp ) { + x[n] = temp->x; + y[n] = temp->y; + z[n] = temp->z; + n++; + temp = temp->Next_acc; + } + + area = great_circle_area_acc(n, x, y, z); + + return area; + +} + +int isIntersect_acc(struct Node_acc node) { + + return node.intersect; + +} + + +int getInbound_acc( struct Node_acc node ) +{ + return node.inbound; +} + +struct Node_acc *getLast_acc(struct Node_acc *list) +{ + struct Node_acc *temp1; + + temp1 = list; + if( temp1 ) { + while( temp1->Next_acc ) { + temp1 = temp1->Next_acc; + } + } + + return temp1; +} + + +int getFirstInbound_acc( struct Node_acc *list, struct Node_acc *nodeOut) +{ + struct Node_acc *temp=NULL; + + temp=list; + + while(temp) { + if( temp->inbound == 2 ) { + copyNode_acc(nodeOut, *temp); + return 1; + } + temp=temp->Next_acc; + } + + return 0; +} + +void getCoordinate_acc(struct Node_acc node, double *x, double *y, double *z) +{ + + + *x = node.x; + *y = node.y; + *z = node.z; + +} + +void getCoordinates_acc(struct Node_acc *node, double *p) +{ + + + p[0] = node->x; + p[1] = node->y; + p[2] = node->z; + +} + +void setCoordinate_acc(struct Node_acc *node, double x, double y, double z) +{ + + + node->x = x; + node->y = y; + node->z = z; + +} + +/* set inbound value for the points in interList that has inbound =0, + this will also set some inbound value of the points in list1 +*/ + +void setInbound_acc(struct Node_acc *interList, struct Node_acc *list) +{ + + struct Node_acc *temp1=NULL, *temp=NULL; + struct Node_acc *temp1_prev=NULL, *temp1_next=NULL; + int prev_is_inside, next_is_inside; + + /* for each point in interList, search through list to decide the inbound value the interList point */ + /* For each inbound point, the prev node should be outside and the next is inside. */ + if(length_acc(interList) == 0) return; + + temp = interList; + + while(temp) { + if( !temp->inbound) { + /* search in grid1 to find the prev and next point of temp, when prev point is outside and next point is inside + inbound = 2, else inbound = 1*/ + temp1 = list; + temp1_prev = NULL; + temp1_next = NULL; + while(temp1) { + if(sameNode_acc(*temp1, *temp)) { + if(!temp1_prev) temp1_prev = getLast_acc(list); + temp1_next = temp1->Next_acc; + if(!temp1_next) temp1_next = list; + break; + } + temp1_prev = temp1; + temp1 = temp1->Next_acc; + } + if(!temp1_next) printf("ERROR from create_xgrid.c: temp is not in list1\n"); + if( temp1_prev->isInside == 0 && temp1_next->isInside == 1) + temp->inbound = 2; /* go inside */ + else + temp->inbound = 1; + } + temp=temp->Next_acc; + } +} + +int isInside_acc(struct Node_acc *node) { + + if(node->isInside == -1) printf("ERROR from mosaic_util.c: node->isInside is not set\n"); + return(node->isInside); + +} + +/* #define debug_test_create_xgrid */ + +/* check if node is inside polygon list or not */ + int insidePolygon_acc( struct Node_acc *node, struct Node_acc *list) +{ + int i, ip, is_inside; + double pnt0[3], pnt1[3], pnt2[3]; + double anglesum; + struct Node_acc *p1=NULL, *p2=NULL; + + anglesum = 0; + + pnt0[0] = node->x; + pnt0[1] = node->y; + pnt0[2] = node->z; + + p1 = list; + p2 = list->Next_acc; + is_inside = 0; + + + while(p1) { + pnt1[0] = p1->x; + pnt1[1] = p1->y; + pnt1[2] = p1->z; + pnt2[0] = p2->x; + pnt2[1] = p2->y; + pnt2[2] = p2->z; + if(samePoint_acc(pnt0[0], pnt0[1], pnt0[2], pnt1[0], pnt1[1], pnt1[2])) return 1; + anglesum += spherical_angle_acc(pnt0, pnt2, pnt1); + p1 = p1->Next_acc; + p2 = p2->Next_acc; + if(p2==NULL)p2 = list; + } + + if( fabs(anglesum - 2*M_PI) < EPSLN8 ) + is_inside = 1; + else + is_inside = 0; + + return is_inside; + +} + +int inside_a_polygon_acc(double *lon1, double *lat1, int *npts, double *lon2, double *lat2) +{ + + double x2[20], y2[20], z2[20]; + double x1, y1, z1; + double min_x2, max_x2, min_y2, max_y2, min_z2, max_z2; + int isinside, i; + + struct Node_acc *grid1=NULL, *grid2=NULL; + + /* first convert to cartesian grid */ + latlon2xyz_acc(*npts, lon2, lat2, x2, y2, z2); + latlon2xyz_acc(1, lon1, lat1, &x1, &y1, &z1); + + max_x2 = maxval_double_acc(*npts, x2); + if(x1 >= max_x2+RANGE_CHECK_CRITERIA) return 0; + min_x2 = minval_double_acc(*npts, x2); + if(min_x2 >= x1+RANGE_CHECK_CRITERIA) return 0; + + max_y2 = maxval_double_acc(*npts, y2); + if(y1 >= max_y2+RANGE_CHECK_CRITERIA) return 0; + min_y2 = minval_double_acc(*npts, y2); + if(min_y2 >= y1+RANGE_CHECK_CRITERIA) return 0; + + max_z2 = maxval_double_acc(*npts, z2); + if(z1 >= max_z2+RANGE_CHECK_CRITERIA) return 0; + min_z2 = minval_double_acc(*npts, z2); + if(min_z2 >= z1+RANGE_CHECK_CRITERIA) return 0; + + + /* add x2,y2,z2 to a Node_acc */ + rewindList_acc(); + grid1 = getNext_acc(); + grid2 = getNext_acc(); + + addEnd_acc(grid1, x1, y1, z1, 0, 0, 0, -1); + for(i=0; i<*npts; i++) addEnd_acc(grid2, x2[i], y2[i], z2[i], 0, 0, 0, -1); + + isinside = insidePolygon_acc(grid1, grid2); + + return isinside; + +} + +/* + is_near_pole() reuturns 1 if a polygon has any point with a latitude + within a threshold from the CGS poles (i.e. near +- Pi/2). + Otherwise returns 0. +*/ +int is_near_pole_acc(const double y[], int n) { + int pole = 0; + for (int i = 0; i < n; i++) { + if ((fabs(y[i]) + from_pole_threshold_rad_acc) > M_PI_2 ) { + pole = 1; + break; + } + } + return pole; +} + +/* + crosses_pole() returns 1 iff one line segment of the polygon has its end at opposit + sides of a pole. i.e. if the longitudes are seperated by about Pi. Note, for realistic + data (not huge polygons), if crosses_pole() reutrns 1, so should is_near_pole(). +*/ +int crosses_pole_acc(const double x[] , int n) { + int has_cl = 0; + for (int i = 0; i < n; i++) { + int im = (i + n - 1) % n; + //int ip = (i + 1) % n; + double dx = x[i] - x[im]; + if (fabs(dx + M_PI) < SMALL_VALUE || fabs(dx - M_PI) < SMALL_VALUE) { + has_cl = 1; + break; + } + } + return has_cl; +} + +/* + Set the_rotation_matrix_acc global variable. + The rotation is 45 degrees and about the vector with orign at + earths center and the direction <0,1,1>/SQRT(2). I.e. a big + rotation away from the pole if what is being rotaed is near a pole. + For rotation matricies formulas and examples, see F.S.Hill, Computer + Graphics Using OpenGL, @nd ed., Chapter 5.3. +*/ +void set_the_rotation_matrix_acc() { + double is2 = 1.0 /M_SQRT2; + + double m00 = 0; + double m01 = - is2; + double m02 = is2; + double m11 = 1.0/2; + double m12 = 0.5; + + double m[3][3] = { {m00, m01, m02}, {m02, m11, m12},{m01, m12, m11} }; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + the_rotation_matrix_acc[i][j] = m[i][j]; + } + } +#pragma acc data update device(the_rotation_matrix_acc[:3][:3]) +} + +/* Rotate point given the passed in rotation matrix */ +void rotate_point_acc(double rv[], double rmat [][3]) { + double v[3]; + + for (int i = 0; i < 3; i++) { + v[i] = 0.0; + for (int j = 0; j < 3; j++) { + v[i] += rmat[i][j] * rv[j]; + } + } + for (int i = 0; i < 3; i++) { + rv[i] = v[i]; + } +} + +/* + Rotate polygon defined by x[], y[] points and store in xr[], yr[]*/ +void rotate_poly_acc(const double x[], const double y[], const int n, double xr[], double yr[]) { + double sv[2]; //a rotated lat/lon + double rv[3]; //rotated xyz point + for (int i = 0; i < n; i++) { + latlon2xyz_acc(1, &x[i], &y[i], &rv[0], &rv[1], &rv[2]); + rotate_point_acc(rv, the_rotation_matrix_acc); + xyz2latlon_acc(1, &rv[0], &rv[1], &rv[2], &sv[0], &sv[1]); + xr[i] = sv[0]; + yr[i] = sv[1]; + } +} + +void pimod_acc(double x[],int nn) +{ + for (int i=0;i M_PI) x[i] -= TPI; + } +} + +/******************************************************************************* + int inside_edge(double x0, double y0, double x1, double y1, double x, double y) + determine a point(x,y) is inside or outside a given edge with vertex, + (x0,y0) and (x1,y1). return 1 if inside and 0 if outside. is + the outward edge normal from vertex to . is the vector + from to . + if Inner produce * > 0, outside, otherwise inside. + inner product value = 0 also treate as inside. +*******************************************************************************/ +int inside_edge_acc(double x0, double y0, double x1, double y1, double x, double y) +{ + const double SMALL = 1.e-12; + double product; + + product = ( x-x0 )*(y1-y0) + (x0-x1)*(y-y0); + return (product<=SMALL) ? 1:0; + + }; /* inside_edge */ + +/* Intersects between the line a and the seqment s + where both line and segment are great circle lines on the sphere represented by + 3D cartesian points. + [sin sout] are the ends of a line segment + returns true if the lines could be intersected, false otherwise. + inbound means the direction of (a1,a2) go inside or outside of (q1,q2,q3) +*/ +int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, double *q3, + double *intersect, double *u_a, double *u_q, int *inbound){ + + /* Do this intersection by reprsenting the line a1 to a2 as a plane through the + two line points and the origin of the sphere (0,0,0). This is the + definition of a great circle arc. + */ + double plane[9]; + double plane_p[2]; + double u; + double p1[3], v1[3], v2[3]; + double c1[3], c2[3], c3[3]; + double coincident, sense, norm; + int i; + int is_inter1, is_inter2; + + *inbound = 0; + + /* first check if any vertices are the same */ + if(samePoint_acc(a1[0], a1[1], a1[2], q1[0], q1[1], q1[2])) { + *u_a = 0; + *u_q = 0; + intersect[0] = a1[0]; + intersect[1] = a1[1]; + intersect[2] = a1[2]; + return 1; + } + else if (samePoint_acc(a1[0], a1[1], a1[2], q2[0], q2[1], q2[2])) { + *u_a = 0; + *u_q = 1; + intersect[0] = a1[0]; + intersect[1] = a1[1]; + intersect[2] = a1[2]; + return 1; + } + else if(samePoint_acc(a2[0], a2[1], a2[2], q1[0], q1[1], q1[2])) { + *u_a = 1; + *u_q = 0; + intersect[0] = a2[0]; + intersect[1] = a2[1]; + intersect[2] = a2[2]; + return 1; + } + else if (samePoint_acc(a2[0], a2[1], a2[2], q2[0], q2[1], q2[2])) { + *u_a = 1; + *u_q = 1; + intersect[0] = a2[0]; + intersect[1] = a2[1]; + intersect[2] = a2[2]; + return 1; + } + + + /* Load points defining plane into variable (these are supposed to be in counterclockwise order) */ + plane[0]=q1[0]; + plane[1]=q1[1]; + plane[2]=q1[2]; + plane[3]=q2[0]; + plane[4]=q2[1]; + plane[5]=q2[2]; + plane[6]=0.0; + plane[7]=0.0; + plane[8]=0.0; + /* Intersect the segment with the plane */ + is_inter1 = intersect_tri_with_line_acc(plane, a1, a2, plane_p, u_a); + + if(!is_inter1) + return 0; + + if(fabs(*u_a) < EPSLN8) *u_a = 0; + if(fabs(*u_a-1) < EPSLN8) *u_a = 1; + + if( (*u_a < 0) || (*u_a > 1) ) return 0; + + /* Load points defining plane into variable (these are supposed to be in counterclockwise order) */ + plane[0]=a1[0]; + plane[1]=a1[1]; + plane[2]=a1[2]; + plane[3]=a2[0]; + plane[4]=a2[1]; + plane[5]=a2[2]; + plane[6]=0.0; + plane[7]=0.0; + plane[8]=0.0; + + /* Intersect the segment with the plane */ + is_inter2 = intersect_tri_with_line_acc(plane, q1, q2, plane_p, u_q); + + if(!is_inter2) + return 0; + + if(fabs(*u_q) < EPSLN8) *u_q = 0; + if(fabs(*u_q-1) < EPSLN8) *u_q = 1; + + if( (*u_q < 0) || (*u_q > 1) ) return 0; + + u =*u_a; + + /* The two planes are coincidental */ + vect_cross_acc(a1, a2, c1); + vect_cross_acc(q1, q2, c2); + vect_cross_acc(c1, c2, c3); + coincident = metric_acc(c3); + + if(fabs(coincident) < EPSLN30) return 0; + + /* Calculate point of intersection */ + intersect[0]=a1[0] + u*(a2[0]-a1[0]); + intersect[1]=a1[1] + u*(a2[1]-a1[1]); + intersect[2]=a1[2] + u*(a2[2]-a1[2]); + + norm = metric_acc( intersect ); + for(i = 0; i < 3; i ++) intersect[i] /= norm; + + /* when u_q =0 or u_q =1, the following could not decide the inbound value */ + if(*u_q != 0 && *u_q != 1){ + p1[0] = a2[0]-a1[0]; + p1[1] = a2[1]-a1[1]; + p1[2] = a2[2]-a1[2]; + v1[0] = q2[0]-q1[0]; + v1[1] = q2[1]-q1[1]; + v1[2] = q2[2]-q1[2]; + v2[0] = q3[0]-q2[0]; + v2[1] = q3[1]-q2[1]; + v2[2] = q3[2]-q2[2]; + + vect_cross_acc(v1, v2, c1); + vect_cross_acc(v1, p1, c2); + + sense = dot_acc(c1, c2); + *inbound = 1; + if(sense > 0) *inbound = 2; /* v1 going into v2 in CCW sense */ + } + + return 1; +} diff --git a/tools/libfrencutils_acc/general_utils_acc.h b/tools/libfrencutils_acc/general_utils_acc.h new file mode 100644 index 00000000..abb0e4ea --- /dev/null +++ b/tools/libfrencutils_acc/general_utils_acc.h @@ -0,0 +1,203 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +#ifndef _GENERAL_UTILS_H +#define _GENERAL_UTILS_H 1 +#endif + +#define min(a,b) (ab ? a:b) + +struct Node_acc{ + double x, y, z, u, u_clip; + int intersect; /* indicate if this point is an intersection, 0 = no, 1= yes, 2=both intersect and vertices */ + int inbound; /* -1 uninitialized, 0 coincident, 1 outbound, 2 inbound */ + int initialized; /* = 0 means empty list */ + int isInside; /* = 1 means one point is inside the other polygon, 0 is not, -1 undecided. */ + int subj_index; /* the index of subject point that an intersection follow. */ + int clip_index; /* the index of clip point that an intersection follow */ + struct Node_acc *Next_acc; +}; + +#pragma acc routine seq +int nearest_index_acc(double value, const double *array, int ia); + +#pragma acc routine seq +int lon_fix_acc(double *x, double *y, int n_in, double tlon); + +#pragma acc routine seq +double minval_double_acc(int size, const double *data); + +#pragma acc routine seq +double maxval_double_acc(int size, const double *data); + +#pragma acc routine seq +double avgval_double_acc(int size, const double *data); + +#pragma acc routine seq +void latlon2xyz_acc(int size, const double *lon, const double *lat, double *x, double *y, double *z); + +#pragma acc routine seq +void xyz2latlon_acc(int size, const double *x, const double *y, const double *z, double *lon, double *lat); + +#pragma acc routine seq +double poly_area_acc(const double lon[], const double lat[], int n); + +#pragma acc routine seq +int fix_lon_acc(double lon[], double lat[], int n, double tlon); + +#pragma acc routine seq +double spherical_angle_acc(const double *v1, const double *v2, const double *v3); + +#pragma acc routine seq +void vect_cross_acc(const double *p1, const double *p2, double *e ); + +#pragma acc routine seq +double dot_acc(const double *p1, const double *p2); + +#pragma acc routine seq +double metric_acc(const double *p) ; + +#pragma acc routine seq +int intersect_tri_with_line_acc(const double *plane, const double *l1, const double *l2, double *p, double *t); + +#pragma acc routine seq +void mult_acc(double m[], double v[], double out_v[]); + +#pragma acc routine seq +int invert_matrix_3x3_acc(double m[], double m_inv[]); + +#pragma acc routine seq +double great_circle_area_acc(int n, const double *x, const double *y, const double *z); + +#pragma acc routine seq +int insidePolygon_acc(struct Node_acc *node, struct Node_acc *list ); + +#pragma acc routine seq +int inside_a_polygon_acc( double *lon1, double *lat1, int *npts, double *lon2, double *lat2); + +#pragma acc routine seq +void rewindList_acc(void); + +#pragma acc routine seq +struct Node_acc *getNext_acc(); + +#pragma acc routine seq +void initNode_acc(struct Node_acc *node); + +#pragma acc routine seq +void addEnd_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, int inside); + +#pragma acc routine seq +int addIntersect_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u1, double u2, + int inbound, int is1, int ie1, int is2, int ie2); + +#pragma acc routine seq +void insertIntersect_acc(struct Node_acc *list, double x, double y, double z, double u1, double u2, int inbound, + double x2, double y2, double z2); + +#pragma acc routine seq +int length_acc(struct Node_acc *list); + +#pragma acc routine seq +int samePoint_acc(double x1, double y1, double z1, double x2, double y2, double z2); + +#pragma acc routine seq +int sameNode_acc(struct Node_acc node1, struct Node_acc node2); + +#pragma acc routine seq +void addNode_acc(struct Node_acc *list, struct Node_acc nodeIn); + +#pragma acc routine seq +struct Node_acc *getNode_acc(struct Node_acc *list, struct Node_acc inNode_acc); + +#pragma acc routine seq +struct Node_acc *getNextNode_acc(struct Node_acc *list); + +#pragma acc routine seq +void copyNode_acc(struct Node_acc *node_out, struct Node_acc node_in); + +#pragma acc routine seq +void printNode_acc(struct Node_acc *list, char *str); + +#pragma acc routine seq +int intersectInList_acc(struct Node_acc *list, double x, double y, double z); + +#pragma acc routine seq +void insertAfter_acc(struct Node_acc *list, double x, double y, double z, int intersect, double u, int inbound, + double x2, double y2, double z2); + +#pragma acc routine seq +double gridArea_acc(struct Node_acc *grid); + +#pragma acc routine seq +int isIntersect_acc(struct Node_acc node); + +#pragma acc routine seq +int getInbound_acc( struct Node_acc node ); + +#pragma acc routine seq +struct Node_acc *getLast_acc(struct Node_acc *list); + +#pragma acc routine seq +int getFirstInbound_acc( struct Node_acc *list, struct Node_acc *nodeOut); + +#pragma acc routine seq +void getCoordinate_acc(struct Node_acc node, double *x, double *y, double *z); + +#pragma acc routine seq +void getCoordinates_acc(struct Node_acc *node, double *p); + +#pragma acc routine seq +void setCoordinate_acc(struct Node_acc *node, double x, double y, double z); + +#pragma acc routine seq +void setInbound_acc(struct Node_acc *interList, struct Node_acc *list); + +#pragma acc routine seq +int isInside_acc(struct Node_acc *node); + +#pragma acc routine seq +void set_rotate_poly_true_acc(void); + +#pragma acc routine seq +int is_near_pole_acc(const double y[], int n); + +#pragma acc routine seq +int crosses_pole_acc(const double x[], int n); + +#pragma acc routine seq +void rotate_point_acc( double rv[], double rmat [][3]); + +#pragma acc routine seq +void rotate_poly_acc(const double x[], const double y[], const int n, double xr[], double yr[]); + +#pragma acc routine seq +void set_the_rotation_matrix_acc(); + +#pragma acc routine seq +void pimod_acc(double x[],int nn); + +#pragma acc routine seq +int inside_edge_acc(double x0, double y0, double x1, double y1, double x, double y); + +#pragma acc routine seq +int line_intersect_2D_3D_acc(double *a1, double *a2, double *q1, double *q2, double *q3, + double *intersect, double *u_a, double *u_q, int *inbound); diff --git a/tools/libfrencutils_acc/globals_acc.h b/tools/libfrencutils_acc/globals_acc.h new file mode 100644 index 00000000..f72d71a1 --- /dev/null +++ b/tools/libfrencutils_acc/globals_acc.h @@ -0,0 +1,69 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ +#ifndef GLOBALS_ACC_H_ +#define GLOBALS_ACC_H_ +#include "globals.h" +#include "parameters.h" + +typedef struct { + int nxcells; + int *input_parent_cell_index; + int *output_parent_cell_index; + double *xcell_area; + double *dcentroid_lon; + double *dcentroid_lat; +} Interp_per_input_tile; + +typedef struct { + size_t nxcells; + int *input_parent_lon_index; + int *input_parent_lat_index; + int *output_parent_lon_index; + int *output_parent_lat_index; + int *output_parent; + int *input_parent_tile; + double *dcentroid_lon; + double *dcentroid_lat; + Interp_per_input_tile *input_tile; + double *xcell_area; + double *weight; + int *index; + char remap_file[STRING]; + int file_exist; +} Interp_config_acc; + +typedef struct { + double *lon_min; + double *lon_max; + double *lat_min; + double *lat_max; + double *lon_cent; + double *area; + int *nvertices; + double **lon_vertices; + double **lat_vertices; + double *recomputed_area; + double *centroid_lon; + double *centroid_lat; + double *skip_cells; + +} Grid_cells_struct_config; + +#endif diff --git a/tools/libfrencutils_acc/parameters.h b/tools/libfrencutils_acc/parameters.h new file mode 100644 index 00000000..0c10ef2b --- /dev/null +++ b/tools/libfrencutils_acc/parameters.h @@ -0,0 +1,46 @@ +/*********************************************************************** + * GNU Lesser General Public License + * + * This file is part of the GFDL FRE NetCDF tools package (FRE-NCTools). + * + * FRE-NCtools is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * FRE-NCtools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FRE-NCTools. If not, see + * . + **********************************************************************/ + +#define AREA_RATIO_THRESH (1.e-6) +#define MASK_THRESH (0.5) +#define MAX_V 8 + +#define AREA_RATIO (1.e-3) +#define MAXVAL (1.e20) +#define TOLERANCE (1.e-10) + +#define MAXXGRID 1e6 + +#define MV 50 + +#define TOLERENCE (1.e-6) +#define EPSLN8 (1.e-8) +#define EPSLN10 (1.e-10) +#define EPSLN15 (1.e-15) +#define EPSLN30 (1.e-30) +#define SMALL_VALUE ( 1.e-10 ) + +#ifndef RANGE_CHECK_CRITERIA +#define RANGE_CHECK_CRITERIA 0.05 +#endif + +#ifndef MAXNODELIST +#define MAXNODELIST 100 +#endif