diff --git a/.gitignore b/.gitignore index 38cfeac..6c514aa 100644 --- a/.gitignore +++ b/.gitignore @@ -55,4 +55,6 @@ tests/data/gridding/meteo_out/**/*.nc tests/data/gridding/meteo_out/**/*.tiff .settings .project -.pydevproject \ No newline at end of file +.pydevproject + +.ipynb_checkpoints/ \ No newline at end of file diff --git a/README.md b/README.md index 727273e..10b2875 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Lisflood Utilities +f# Lisflood Utilities This repository hosts source code of LISFLOOD utilities. Go to [Lisflood OS page](https://ec-jrc.github.io/lisflood/) for more information. @@ -17,36 +17,38 @@ Other useful resources ## Intro -Lisflood Utilities is a set of tools to help LISFLOOD users (or any users of PCRaster/netCDF files) +Lisflood Utilities is a set of tools to help LISFLOOD users (or any users of PCRaster/NetCDF files) to execute some mundane tasks that are necessary to operate lisflood. Here's a list of utilities you can find in lisflood-utilities package. -* __pcr2nc__ is a tool to convert PCRaster maps to netCDF4 files. +* __[pcr2nc](#pcr2nc)__ is a tool to convert PCRaster maps to NetCDF4 files. - convert a single map into a NetCDF4 file - convert a time series of maps into a NetCDF4 mapstack - support for WGS84 and ETRS89 (LAEA) reference systems - fine tuning of output files (compression, significant digits, etc.) -* __nc2pcr__ is a tool to convert a netCDF file into PCRaster maps. +* __[nc2pcr](#nc2pcr)__ is a tool to convert a NetCDF file into PCRaster maps. - convert 2D variables in single PCRaster maps - - netCDF4 mapstacks are not supported yet + - NetCDF4 mapstacks are not supported yet -* __cutmaps__ is a tool to cut netcdf files in order to reduce size, using either +* __[cutmaps](#cutmaps)__ is a tool to cut NetCDF files in order to reduce size, using either - a bounding box of coordinates - a bounding box of matrix indices - an existing boolean area mask - - a list of stations and a LDD (in netCDF or PCRaster format) **Note: PCRaster must be installed in the conda env** + - a list of stations and a LDD ("local drain direction" in NetCDF or PCRaster format) + +> **Note**: PCRaster must be installed in the Conda environment. -* __thresholds__ is a tool to compute the discharge return period thresholds from netCDF4 file containing a discharge time series. +* __[compare](#compare)__ is a package containing a set of simple Python classes that helps to compare +NetCDF, PCRaster and TSS files. -* __compare__ is a package containing a set of simple Python classes that helps to compare -netCDF, PCRaster and TSS files. +* __[thresholds](#thresholds)__ is a tool to compute the discharge return period thresholds from NetCDF4 file containing a discharge time series. -* __water-demand-historic__ is a package allowing to generate sectoral (domestic, livestock, industry, and thermoelectric) water demand maps with monthly to yearly temporal steps for a range of past years, at the users’ defined spatial resolution and geographical extent. These maps are required by the LISFLOOD OS [water use module](https://ec-jrc.github.io/lisflood-model/2_18_stdLISFLOOD_water-use) +* __[water-demand-historic](#water-demand-historic)__ is a package allowing to generate sectoral (domestic, livestock, industry, and thermoelectric) water demand maps with monthly to yearly temporal steps for a range of past years, at the users’ defined spatial resolution and geographical extent. These maps are required by the LISFLOOD OS [water use module](https://ec-jrc.github.io/lisflood-model/2_18_stdLISFLOOD_water-use) -* __waterregions__ is a package containing two scripts that allow to create and verify a water regions map, respectively. +* __[waterregions](#waterregions)__ is a package containing two scripts that allow to create and verify a water regions map, respectively. -* __gridding__ is a tool to interpolate meteo variables observations stored in text files containing (lon, lat, value) into grids. +* __[gridding](#gridding)__ is a tool to interpolate meteo variables observations stored in text files containing (lon, lat, value) into grids. - uses inverse distance interpolation - input file names must use format: \YYYYMMDDHHMI_YYYYMMDDHHMISS.txt - option to store all interpolated grids in a single NetCDF4 file @@ -55,7 +57,11 @@ netCDF, PCRaster and TSS files. - grids are setup in the configuration folder and are defined by a dem.nc file - meteo variables parameters are defined in the same configuration folder -* __cddmap__ is a tool to generate correlation decay distance (CDD) maps starting from station timeseries +* __[cddmap](#cddmap)__ is a tool to generate correlation decay distance (CDD) maps starting from station timeseries + +* __[ncextract](#ncextract)__ is a tool to extract values from NetCDF4 (or GRIB) file(s) at specific coordinates. + +* __[catchstats](#catchstats)__ calculates catchment statistics (mean, sum, std, min, max...) from NetCDF4 files given masks created with [`cutmaps`](#cutmaps:-a-NetCDF-files-cookie-cutter). The package contains convenient classes for reading/writing: @@ -64,8 +70,6 @@ The package contains convenient classes for reading/writing: * NetCDFMap * NetCDFWriter -* __ncextract__ is a tool to extract values from netCDF4 file at specific coordinates. - ### Installation #### Requisites @@ -75,7 +79,7 @@ Otherwise, ensure you have properly installed the following software: - Python 3.5+ - GDAL C library and software -- netCDF4 C library +- NetCDF4 C library #### Install If you use conda, create a new env (or use an existing one) and install gdal and lisflood-utilities: @@ -107,7 +111,7 @@ gdal-config --version # 3.0.1 pip install GDAL==3.0.1 ``` -Note: if you previously installed an older version of the lisflood-utilitiies, it is highly recommended to remove it before installing the newest version: +Note: if you previously installed an older version of the lisflood-utilities, it is highly recommended to remove it before installing the newest version: ```bash pip uninstall lisflood-utilities @@ -224,7 +228,7 @@ In this section you have to configure `units` and `calendar`. ## nc2pcr -This tool converts single maps netCDF (time dimension is not supported yet) to PCRaster format. +This tool converts single maps NetCDF (time dimension is not supported yet) to PCRaster format. ### Usage @@ -238,90 +242,103 @@ If input file is a LDD map, you must add the `-l` flag: nc2pcr -i /path/to/input/ldd.nc -o /path/to/output/ldd.map -l [-c /path/to/clone.map optional] ``` -## Cutmaps: a NetCDF files cookie-cutter +## cutmaps -This tool cut netcdf files, using a mask, a bounding box or a list of stations along with a LDD map. +This tool cuts NetCDF files using either a mask, a bounding box, or a list of stations along with a LDD (local drain direction) map. -### Usage: -The tool accepts as input: +### Usage -* a mask map (either PCRaster or netCDF format) using the -m argument or - - alternatively, using the -i argument, matrix indices in the form `imin imax jmin jmax` (imin, imax, jmin, jmax must be integer numbers) - - alternatively, using the -c argument, coordinates bounding box in the form `xmin xmax ymin ymax` (xmin, xmax, ymin, ymax can be integer or floating point numbers; x = longitude, y = latitude) - - alternatively, using the -N and -l arguments, list of stations with coordinates and a LDD map. -* a path to a netCDF file (-F argument), a folder containing netCDF files to cut (-f argument) or a static dataset path (-S argument) like LISFLOOD static files. -* a path to a folder where to write cut files (-o argument). +The tool requires a series of arguments: -The following command will cut all netcdf files inside _/workarea/Madeira/lai/_ folder -and produced files will be writte in current folder. -The cookie-cutter that will be used is _/workarea/Madeira/maps/MaskMap/Bacia_madeira.nc_. -This file is a mask (boolean map with 1 only in the area of interest) where cutmaps takes the bounding box from. -The mask can also be in PCRaster format. +* The area to be extracted can be defined in one of the following ways: + - `-m`, `--mask`: a mask map (either PCRaster or NetCDF format). + - `-i`, `--cuts_indices`: a bounding box defined by matrix indices in the form `-i imin imax jmin jmax` (the indices must be integers). + - `-c`, `--cuts`: a bounding box defined by coordinates in the form `-c xmin xmax ymin ymax` (the coordinates can be integer or floating point numbers; x = longitude, y = latitude). + - `-N`, `-stations`: a list of stations included in a tab separated text file. This approach requires a LDD (local drain direction) map as an extra input, defined with the argument `-l` (`-ldd`). +* The files to be cut may be defined in one of the following ways: + - `-f`, `--folder`: a folder containing NetCDF files. + - `-F`, `--file`: a single netCDF file to be cut. + - `-S`, `--subdir`: a directory containing a number of folders +* The resulting files will be saved in the folder defined by the argument `-o` ( `--outpath`). -```bash -cutmaps -m /workarea/Madeira/maps/MaskMap/Bacia_madeira.nc -f /workarea/Madeira/lai/ -o ./ -``` +There are additional optional arguments + +* `-W`, `--overwrite`: it allows to overwrite results. +* `-C`, `--clonemap`: it can be used to define a clone map when the LDD input map (argument `-l`) is in NetCDF format. -The following command will cut a single netCDF file and produced file will be writte in current folder. +#### Examples + +**Using a mask** + +The following command will cut all NetCDF files inside a specific folder (argument `-f`) using a mask (argument `-m`). The mask is a boolean map (1 only in the area of interes) that `cutmaps` uses to create a bounding box. The resulting files will be written in the current folder (argument `-o`). ```bash -cutmaps -m /workarea/Madeira/maps/MaskMap/Bacia_madeira.nc -F /workarea/Madeira/lai/tp.nc -o ./ +cutmaps -m /workarea/Madeira/maps/MaskMap/Bacia_madeira.nc -f /workarea/Madeira/lai/ -o ./ ``` -**Indices can also be passed as an argument (using -i argument instead of -m). Knowing your area of interest from your netCDF files, -you can determine indices of the array and you can pass in the form `imin imax jmin jmax` (imin, imax, jmin, jmax must be integer numbers).** +**Using indices** + +The following command cuts all the maps in an input folder (argument `-f`) using a bounding box defined by matrix indices (argument `-i`). Knowing your area of interest from your NetCDF files, you can determine indices of the array and pass them in the form `-i imin imax jmin jmax` (integer numbers). ```bash cutmaps -i "150 350 80 180" -f /workarea/Madeira/lai/ -o ./ ``` -**Example with coordinates (using -c argument) `xmin xmax ymin ymax` (xmin, xmax, ymin, ymax can be integer or floating point numbers; x = longitude, y = latitude) and path to EFAS/GloFAS static data (-S option), with -W to allow overwriting existing files in output directory:** +**Using coordinates** + +The following command cuts all the maps in an input directory containing several folders (argument `-S`) using a bounding box defined by coordinates (argument `-c`). The argument `-W` allows to overwrite pre-existing files in the output directory (argument `-o`): ```bash cutmaps -S /home/projects/lisflood-eu -c "4078546.12 4463723.85 811206.57 1587655.50" -o /Work/Tunisia/cutmaps -W ``` -**Example with stations.txt and LDD** +**Using station coordinates and a local drain direction map** -Given a LDD map and a list of stations in a text file, each row having coordinates X/Y or lon/lat and an index, separated by tabs: +The TXT file with stations must have a specific format as in the example below. Each row represents a stations, and it contains three columns separated by tabs that indicated the X and Y coordinates (or lon and lat) and the ID of the station. ```text -4297500 1572500 1 -4292500 1557500 2 -4237500 1537500 3 -4312500 1482500 4 -4187500 1492500 5 +4297500 1572500 1 +4292500 1557500 2 +4237500 1537500 3 +4312500 1482500 4 +4187500 1492500 5 ``` +The following command will cut all the maps in a specific folder (`-f` argument) given a LDD map (`-l` argument) and the previous text file (`-N` argument), and save the results in a folder defined by the argument `-o`. + ```bash -cutmaps -S /home/projects/lisflood-eu -l ldd.map -N stations.txt -o /Work/Tunisia/cutmaps +cutmaps -f /home/projects/lisflood-eu -l ldd.map -N stations.txt -o /Work/Tunisia/cutmaps ``` -If ldd is in netCDF format, LDD will be converted to PCRaster format, first. +If the LDD is in NetCDF format, it will be first converted into PCRaster format. ```bash -cutmaps -S /home/projects/lisflood-eu -l ldd.nc -N stations.txt -o /Work/Tunisia/cutmaps +cutmaps -f /home/projects/lisflood-eu -l ldd.nc -N stations.txt -o /Work/Tunisia/cutmaps ``` -If you experience problems, you can try to pass a path to a PCRaster clone map. +If you experience problems, you can try to pass a path to a PCRaster clone map using the `-C` argument. ```bash -cutmaps -S /home/projects/lisflood-eu -l ldd.nc -C area.map -N stations.txt -o /Work/Tunisia/cutmaps +cutmaps -f /home/projects/lisflood-eu -l ldd.nc -C area.map -N stations.txt -o /Work/Tunisia/cutmaps ``` -You will find the produced mask.map and mask.nc for your area in the same folder of ldd map; you will need it for lisflood/lisvap executions. -You will also have outlets.map/outlets.nc based on stations.txt, which let you produce gauges TSS if configured in LISFLOOD. -## compare utility +### Output + +Apart from the cut files in the output folder specified in the command prompt, `cutmaps` produces other outputs in the folder where the LDD map is stored: + +* _mask.map_ and _mask.nc_ for your area of interest, which may be needed in subsequent LISFLOOD/LISVAP executions. +* _outlets.map_ and _outlets.nc_ based on _stations.txt_, which will let you produce gauges TSS if configured in LISFLOOD. -This tool let you compare two netcdf datasets. You can configure it with tolerances (atol, rtol, thresholds for percentage of tolerated different values). -You can also set the option to write diff files, so that you can inspect maps and differences with a tool like Panoply +## compare + +This tool compares two NetCDF datasets. You can configure it with tolerances (absolute `--atol`, relative `--rtol`, thresholds for percentage of tolerated different values `--max-diff-percentage`). You can also set the option `--save-diffs` to write files with the diffences, so that you can inspect maps and differences with tools like [Panoply](https://www.giss.nasa.gov/tools/panoply/). ```text usage: compare [-h] -a DATASET_A -b DATASET_B -m MASKAREA [-M SUBMASKAREA] [-e] [-s] [-D] [-r RTOL] [-t ATOL] [-p MAX_DIFF_PERCENTAGE] [-l MAX_LARGEDIFF_PERCENTAGE] -Compare netCDF outputs: 0.12.12 +Compare NetCDF outputs: 0.12.12 optional arguments: -h, --help show this help message and exit @@ -333,7 +350,7 @@ optional arguments: path to mask -e, --array-equal flag to compare files to be identical -s, --skip-missing flag to skip missing files in comparison - -D, --save-diffs flag to save diffs in netcdf files for visual + -D, --save-diffs flag to save diffs in NetCDF files for visual comparisons. Files are saved in ./diffs folder of current directory.For each file presenting differences, you will find files diffs, original A and @@ -408,17 +425,17 @@ This utility allows to create a water region map which is consistent with a set #### Input - List of the coordinates of the calibration points. This list must be provided in a .txt file with three columns: LONGITUDE(or x), LATITUDE(or y), point ID. -- LDD map can be in netcdf format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_LDD*. -- Countries map in netcdf format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_NOMINAL*. This map shows the political boundaries of the Countries, each Coutry is identified by using a unique ID. This map is used to ensure that the water regions are not split accross different Countries. -- Map of the initial definition of the water regions in netcdf format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_NOMINAL*. This map is used to attribute a water region to areas not included in the calibration catchments. In order to create this map, the user can follow the guidelines provided [here](https://ec-jrc.github.io/lisflood-model/2_18_stdLISFLOOD_water-use/). -- file *.yaml* or *.json* to define the metadata of the output water regions map in netcdf format. An example of the structure of these files is provided [here](https://github.com/ec-jrc/lisflood-utilities/tree/master/tests/data/waterregions) +- LDD map can be in NetCDF format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_LDD*. +- Countries map in NetCDF format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_NOMINAL*. This map shows the political boundaries of the Countries, each Coutry is identified by using a unique ID. This map is used to ensure that the water regions are not split accross different Countries. +- Map of the initial definition of the water regions in NetCDF format or pcraster format. When using pcraster format, the following condition must be satisfied: *PCRASTER_VALUESCALE=VS_NOMINAL*. This map is used to attribute a water region to areas not included in the calibration catchments. In order to create this map, the user can follow the guidelines provided [here](https://ec-jrc.github.io/lisflood-model/2_18_stdLISFLOOD_water-use/). +- file *.yaml* or *.json* to define the metadata of the output water regions map in NetCDF format. An example of the structure of these files is provided [here](tests/data/waterregions) ##### Input data provided by this utility: -This utility provides three maps of [Countries IDs](https://github.com/ec-jrc/lisflood-utilities/tree/master/tests/data): 1arcmin map of Europe (EFAS computational domain), 0.1 degree and 3arcmin maps of the Globe . ACKNOWLEDGEMENTS: both the rasters were retrieved by upsampling the original of the World Borders Datase provided by http://thematicmapping.org/ (the dataset is available under a Creative Commons Attribution-Share Alike License). +This utility provides three maps of [Countries IDs](tests/data/waterregions): 1arcmin map of Europe (EFAS computational domain), 0.1 degree and 3arcmin maps of the Globe. ACKNOWLEDGEMENTS: both the rasters were retrieved by upsampling the original of the World Borders Datase provided by http://thematicmapping.org/ (the dataset is available under a Creative Commons Attribution-Share Alike License). #### Output Map of the water regions which is consistent with the calibration catchments. In other words, each water region is entirely included in one calibration catchment. The test to check the consistency between the newly created water regions map and the calibration catchments is implemented internally by the code and the outcome of the test is printed on the screen. -In the output map, each water region is identified by a unique ID. The format of the output map can be netcdf or pcraster. +In the output map, each water region is identified by a unique ID. The format of the output map can be NetCDF or pcraster. #### Usage The following command lines allow to produce a water region map which is consistent with the calibration points (only one commad line is required: each one of the command lines below shows a different combination of input files format): @@ -430,9 +447,9 @@ The following command lines allow to produce a water region map which is consist *python define_waterregions.py -p calib_points_test.txt -l ldd_test.map -C countries_id_test.nc -w waterregions_initial_test.map -o my_new_waterregions.nc -m metadata.test.yaml*
-The input maps can be in nectdf format or pcraster format (the same command line can accept a mix of pcraster and netcdf formats).It is imperative to write the file name in full, that is including the extension (which can be either ".nc" or ".map").
-The utility can return either a pcraster file or a netcdf file. The users select their preferred format by specifying the extension of the file in the output option (i.e. either ".nc" or ".map").
-The metadata file in .yaml format must be provided only if the output file is in netcdf format.
+The input maps can be in nectdf format or pcraster format (the same command line can accept a mix of pcraster and NetCDF formats).It is imperative to write the file name in full, that is including the extension (which can be either ".nc" or ".map").
+The utility can return either a pcraster file or a NetCDF file. The users select their preferred format by specifying the extension of the file in the output option (i.e. either ".nc" or ".map").
+The metadata file in .yaml format must be provided only if the output file is in NetCDF format.
The code internally verifies that the each one of the newly created water regions is entirely included within one calibration catchments. If this condition is satisfied, the follwing message in printed out: *“OK! Each water region is completely included inside one calibration catchment”*. If the condition is not satisfied, the error message is *“ERROR: The water regions WR are included in more than one calibration catchment”*. Moreover, the code provides the list of the water regions WR and the calibration catchments that do not meet the requirment. This error highlight a problem in the input data: the user is recommended to check (and correct) the list of calibration points and the input maps. @@ -467,8 +484,8 @@ optional arguments: This function allows to verify the consistency between a water region map and a map of calibration catchments. This function must be used when the water region map and the map of calibration catchments have been defined in an independent manner (i.e. not using the utility **define_waterregions**). The function verify_waterregions verifies that each water region map is entirely included in one calibration catchment. If this condition is not satisfied, an error message is printed on the screen. #### Input -- Map of calibration catchments in netcdf format. -- Water regions map in netcdf format. +- Map of calibration catchments in NetCDF format. +- Water regions map in NetCDF format. #### Output The output is a message on the screen. There are two options: @@ -492,13 +509,13 @@ calibration catchments optional arguments: -h, --help show this help message and exit -cc CALIB_CATCHMENTS, --calib_catchments CALIB_CATCHMENTS - map of calibration catchments, netcdf format + map of calibration catchments, NetCDF format -wr WATERREGIONS, --waterregions WATERREGIONS - map of water regions, netcdf format + map of water regions, NetCDF format ``` NOTE: -The utility **pcr2nc** can be used to convert a map in pcraster format into netcdf format. +The utility **pcr2nc** can be used to convert a map in pcraster format into NetCDF format. ## gridding @@ -517,7 +534,7 @@ python3, pyg2p The tool requires four mandatory command line input arguments: - -i, --in: Set input folder path with kiwis/point files -- -o, --out: Set output folder base path for the tiff files or the netCDF file path. +- -o, --out: Set output folder base path for the tiff files or the NetCDF file path. - -c, --conf: Set the grid configuration type to use. Right now only 5x5km, 1arcmin are available. - -v, --var: Set the variable to be processed. Right now only variables pr,pd,tn,tx,ws,rg,pr6,ta6 are available. @@ -525,7 +542,7 @@ The input folder must contain the meteo observation in text files with file name The files must contain the columns longitude, latitude, observation_value is separated by TAB and without the header. Not mandatory but could help to store the files in a folder structure like: ./YYYY/MM/DD/\YYYYMMDDHHMI_YYYYMMDDHHMISS.txt -Example of command that will generate a netCDF file containing the precipitation (pr) grids for March 2023: +Example of command that will generate a NetCDF file containing the precipitation (pr) grids for March 2023: ```bash gridding -i /meteo/pr/2023/ -o /meteo/pr/pr_MARCH_2023.nc -c 1arcmin -v pr -s 202303010600 -e 202304010600 @@ -538,22 +555,22 @@ gridding --help ``` ```text -usage: gridding [-h] -i input_folder -o {output_folder, netcdf_file} -c +usage: gridding [-h] -i input_folder -o {output_folder, NetCDF_file} -c {5x5km, 1arcmin,...} -v {pr,pd,tn,tx,ws,rg,...} [-d files2process.txt] [-s YYYYMMDDHHMISS] [-e YYYYMMDDHHMISS] [-q] [-t] [-f] version v0.1 ($Mar 28, 2023 16:01:00$) This script interpolates meteo input variables data into either a single NETCDF4 file or one GEOTIFF file per -timestep. The resulting netCDF is CF-1.6 compliant. +timestep. The resulting NetCDF is CF-1.6 compliant. optional arguments: -h, --help show this help message and exit -i input_folder, --in input_folder Set input folder path with kiwis/point files - -o {output_folder, netcdf_file}, --out {output_folder, netcdf_file} + -o {output_folder, NetCDF_file}, --out {output_folder, NetCDF_file} Set output folder base path for the tiff files or the - netCDF file path. + NetCDF file path. -c {5x5km, 1arcmin,...}, --conf {5x5km, 1arcmin,...} Set the grid configuration type to use. -v {pr,pd,tn,tx,ws,rg,...}, --var {pr,pd,tn,tx,ws,rg,...} @@ -571,9 +588,9 @@ optional arguments: [default: 20230421060000] -q, --quiet Set script output into quiet mode [default: False] -t, --tiff Outputs a tiff file per timestep instead of the - default single netCDF [default: False] + default single NetCDF [default: False] -f, --force Force write to existing file. TIFF files will be - overwritten and netCDF file will be appended. + overwritten and NetCDF file will be appended. [default: False] ``` @@ -611,11 +628,20 @@ cddmap /meteo/pr --parallel --maxdistance 500 ## ncextract -The ncextract tool extracts the time series of values from (multiple) netCDF file(s) at user defined coordinates. +The `ncextract` tool extracts time series from (multiple) NetCDF or GRIB file(s) at user defined coordinates. -### Usage: -The tool takes as input a CSV file containing point coordinates (structured in 3 columns: id, lat, lon) and a directory containing one or more netCDF files. -The output is a CSV file (or optionally a netCDF file) containing the values at the points corresponding to the provided coordinates, in chronological order. +### Usage + +The tool takes as input a CSV file containing point coordinates and a directory containing one or more NetCDF or GRIB files. The CSV files must contain only three columns: point identifier, and its two coordinates. The name of the coordinate fields must match those in the NetCDF or GRIB files. For example: + +```text +ID,lat,lon +0010,40.6083,-4.2250 +0025,37.5250,-6.2750 +0033,40.5257,-6.4753 +``` + +The output is a file containing the time series at the pixels corresponding to the provided coordinates, in chronological order. The function supports two otput formats: CSV or NetCDF. ```text usage: ncextract.py [-h] -i INPUT -d DIRECTORY -o OUTPUT [-nc] @@ -631,11 +657,103 @@ options: -d DIRECTORY, --directory DIRECTORY Input directory with .nc files -o OUTPUT, --output OUTPUT - Output file (default is CSV, use -nc for NetCDF) - -nc, --nc Output to NetCDF + Output file. Two extensions are supported: .csv or .nc +``` + +#### Use in the command prompt + +The following command extracts the discharge time series from EFAS simulations (NetCDF files in the directory _EFAS5/discharge/maps_) in a series of points where gauging stations are located (file _gauging_stations.csv_), and saves the extraction as a CSV file. + +```bash +ncextract -i ./gauging_stations.csv -d ./EFAS5/discharge/maps/ -o ./EFAS5/discharge/timeseries/results_ncextract.csv +``` + +#### Use programmatically + +The function can be imported in a Python script. It takes as inputs two `xarray.Dataset`: one defining the input maps and the other the points of interest. The result of the extraction can be another `xarray.Dataset`, or saved as a file either in CSV or NetCDF format. + +```Python +from lisfloodutilities.ncextract import extract_timeseries + +# load desired input maps and points +# maps: xarray.Dataset +# points: xarray.Dataset + +# extract time series and save in a xarray.Dataset +ds = extract_timeseries(maps, points, output=None) +``` + + +## catchstats + +The `catchstats` tool calculates catchment statistics given a set of input NetCDF files and a set of mask NetCDF files. + +### Usage + +#### In the command prompt + +The tool takes as input a directory containing the NetCDF files from which the statistics will be computed, and another directory containing the NetCDF files that define the catchment boundaries, which can be any of the outputs of `cutmaps` (not necessarily the file _my_mask.nc_). The input files can be the LISFLOOD static maps (no temporal dimension) or stacks of maps with a temporal dimension. The mask NetCDF files must be named after the catchment ID, as this name will be used to identify the catchment in the output NetCDF. For instance, _0142.nc_ would correspond to the mask of catchment 142. Optionally, an extra NetCDF file can be passed to the tool to account for different pixel area; in this case, the statistics will be weighted by this pixel area map. + +Only some statistics are currently available: mean, sum, std (standard deviation), var (variance), min, max, median and count. The weighing based on pixel area does not affect the statistics min, max, median nor count. + +The output are NetCDF files (as many as catchments in the mask directory) containing the resulting statistics. + +```text +usage: catchstats.py [-h] -i INPUT -m MASK -s STATISTIC -o OUTPUT -a AREA [-W] + +Utility to compute catchment statistics from (multiple) NetCDF files. +The mask map is a NetCDF file with values in the area of interest and NaN elsewhere. +The area map is optional and accounts for varying pixel area with latitude. + +options: + -h, --help + show this help message and exit + -i INPUT, --input INPUT + directory containing the input NetCDF files + -m MASK, --mask MASK + directory containing the mask NetCDF files + -s STATISTIC, --statistic STATISTIC + list of statistics to be computed. Possible values: mean, sum, std, var, min, max, median, count + -o OUTPUT, --output OUTPUT + directory where the output NetCDF files will be saved + -a AREA, --area AREA + NetCDF file of pixel area used to weigh the statistics + -W, --overwrite + overwrite existing output files +``` + +**Example** + +The following command calculates the average and total precipitation for a set of catchemtns from the dataset EMO-1. The static map _pixarea.nc_ is used to account for varying pixel area. + +```bash +catchstats -i ./EMO1/pr/ -m ./masks/ -s mean sum -o ./areal_precipitation/ -a ./EFAS5/static_maps/pixarea.nc +``` + +#### In a Python script + +The tool can be imported in a Python script to be able to save in memory the output. This function takes in a `xarray.Dataset` with the input maps from which statistics will be computed, a dictionary of `xarray.DataArray` with the catchment masks, and optionally the weighing map. By default, the result is a `xarray.Dataset`, but NetCDF files could be written, instead, if a directory is provided in the `output` attribute. + +```Python +# import function +from lisfloodutilities.catchstats import catchment_statistics + +# load desired input maps and catchment masks +# maps: xarray.Dataset +# masks: Dict[int, xarray.DataArray] + +# compute statistics and save in a xarray.Dataset +ds = catchment_statistics(maps, masks, statistic=['mean', 'sum'], weight=None, output=None) +``` + +### Ouput + +The structure of the output depends on whether the input files include a temporal dimension or not: +* If the input files DO NOT have a time dimension, the output has a single dimension: the catchment ID. It contains as many variables as the combinations of input variables and statistics. For instance, if the input variables are "elevation" and "gradient" and three statistics are required ("mean", "max", "min"), the output will contain 6 variables: "elevation_mean", "elevation_max", "elevation_min", "gradient_mean", "gradient_max" and "gradient_min". +* If the input files DO have a time dimension, the output has two dimensions: the catchment ID and time. The number of variables follows the same structure explained in the previous point. For instance, if the input files are daily maps of precipitation (variable name "pr") and we calculate the mean and total precipitation over the catchment, the output will contain two dimensions ("ID", "time") and two variables ("pr_mean", "pr_sum"). -## Using lisfloodutilities programmatically +## Using `lisfloodutilities` programmatically You can use lisflood utilities in your python programs. As an example, the script below creates the mask map for a set of stations (stations.txt). The mask map is a boolean map with 1 and 0. 1 is used for all (and only) the pixels hydrologically connected to one of the stations. The resulting mask map is in pcraster format. diff --git a/bin/catchstats b/bin/catchstats new file mode 100644 index 0000000..732b318 --- /dev/null +++ b/bin/catchstats @@ -0,0 +1,14 @@ +#!python + +import os +import sys + +current_dir = os.path.dirname(os.path.abspath(__file__)) +src_dir = os.path.normpath(os.path.join(current_dir, '../src/')) +if os.path.exists(src_dir): + sys.path.append(src_dir) + +from lisfloodutilities.catchstats import main_script + +if __name__ == '__main__': + main_script() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 020b922..4365cba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ attrs>=19.3.0 cffi>=1.15.1 +cfgrib>=0.9.11.0 cftime>=1.0.4.2 cloudpickle>=2.2.1 coverage>=6.0 diff --git a/setup.py b/setup.py index 7ffbbdf..d669cb3 100644 --- a/setup.py +++ b/setup.py @@ -128,7 +128,10 @@ def run(self): 'nc2pcr: Convert netCDF files ot PCRaster format; ' 'cutmaps: cut netCDF files; ' 'compare: compare two set of netCDF files; ' - 'ncextract: extract values from netCDF files; ', + 'thresholds: compute discharge return period thresholds; ' + 'gridding: interpolate meteo variables observations; ' + 'ncextract: extract values from netCDF files; ' + 'catchstats: calculates catchment statistics; ', long_description=long_description, long_description_content_type='text/markdown', setup_requires=[ @@ -145,7 +148,7 @@ def run(self): keywords=['netCDF4', 'PCRaster', 'mapstack', 'lisflood', 'efas', 'glofas', 'ecmwf', 'copernicus'], license='EUPL 1.2', url='https://github.com/ec-jrc/lisflood-utilities', - scripts=['bin/pcr2nc', 'bin/cutmaps', 'bin/compare', 'bin/nc2pcr', 'bin/thresholds', 'bin/gridding', 'bin/cddmap', 'bin/ncextract',], + scripts=['bin/pcr2nc', 'bin/cutmaps', 'bin/compare', 'bin/nc2pcr', 'bin/thresholds', 'bin/gridding', 'bin/cddmap', 'bin/ncextract','bin/catchstats',], zip_safe=True, classifiers=[ # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers diff --git a/src/lisfloodutilities/catchstats/__init__.py b/src/lisfloodutilities/catchstats/__init__.py new file mode 100644 index 0000000..fbe7a06 --- /dev/null +++ b/src/lisfloodutilities/catchstats/__init__.py @@ -0,0 +1,18 @@ +""" + +Copyright 2019-2023 European Union + +Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); + +You may not use this work except in compliance with the Licence. +You may obtain a copy of the Licence at: + +https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt + +Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licence for the specific language governing permissions and limitations under the Licence. + +""" + +from .catchstats import * diff --git a/src/lisfloodutilities/catchstats/catchstats.py b/src/lisfloodutilities/catchstats/catchstats.py new file mode 100644 index 0000000..262268b --- /dev/null +++ b/src/lisfloodutilities/catchstats/catchstats.py @@ -0,0 +1,272 @@ +""" +Copyright 2019-2023 European Union +Licensed under the EUPL, Version 1.2 or as soon they will be approved by the European Commission subsequent versions of the EUPL (the "Licence"); +You may not use this work except in compliance with the Licence. +You may obtain a copy of the Licence at: +https://joinup.ec.europa.eu/sites/default/files/inline-files/EUPL%20v1_2%20EN(1).txt +Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licence for the specific language governing permissions and limitations under the Licence. + +""" + +import argparse +import os +from pathlib import Path +import pandas as pd +import sys +import time +import xarray as xr +from typing import Dict, List, Union, Optional +# from tqdm.auto import tqdm + + +def read_inputmaps(inputmaps: Union[str, Path]) -> xr.Dataset: + """It reads the input maps in NetCDF format from the input directory + + Parameters: + ----------- + inputmaps: str or pathlib.Path + directory that contains the input NetCDF files whose statistics will be computed. These files can be static (withouth time dimension) or dynamic (with time dimension) + + Returns: + -------- + ds: xr.Dataset + """ + + inputmaps = Path(inputmaps) + if not inputmaps.is_dir(): + print(f'ERROR: {inputmaps} is missing or not a directory!') + sys.exit(1) + + filepaths = list(inputmaps.glob('*.nc')) + if not filepaths: + print(f'ERROR: No NetCDF files found in "{inputmaps}"') + sys.exit(2) + + print(f'{len(filepaths)} input NetCDF files found in "{inputmaps}"') + + try: + # for dynamic maps + ds = xr.open_mfdataset(filepaths, chunks='auto', parallel=True, engine='netcdf4') + # chunks is set to auto for general purpose processing + # it could be optimized depending on input NetCDF + except: + # for static maps + ds = xr.Dataset({file.stem.split('_')[0]: xr.open_dataset(file, engine='netcdf4')['Band1'] for file in filepaths}) + if 'wgs_1984' in ds: + ds = ds.drop_vars('wgs_1984') + + return ds + +def read_masks(mask: Union[str, Path]) -> Dict[int, xr.DataArray]: + """It loads the catchment masks in NetCDF formal from the input directory + + Parameters: + ----------- + mask: str or pathlib.Path + directory that contains the NetCDF files that define the catchment boundaries. These files can be the output of the `cutmaps` tool + + Returns: + -------- + masks: dictionary of xr.DataArray + keys represent the catchment ID and the values boolean maps of the catchment + """ + + # check masks + mask = Path(mask) + if not mask.is_dir(): + print(f'ERROR: {mask} is not a directory!') + sys.exit(1) + + maskpaths = list(mask.glob('*.nc')) + if not maskpaths: + print(f'ERROR: No NetCDF files found in "{mask}"') + sys.exit(2) + + print(f'{len(maskpaths)} mask NetCDF files found in "{mask}"') + + # load masks + masks = {} + for maskpath in maskpaths: + ID = int(maskpath.stem) + try: + try: + aoi = xr.open_dataset(maskpath, engine='netcdf4')['Band1'] + except: + aoi = xr.open_dataarray(maskpath, engine='netcdf4') + aoi = xr.where(aoi.notnull(), 1, aoi) + masks[ID] = aoi + except Exception as e: + print(f'ERROR: The mask {maskpath} could not be read: {e}') + continue + + return masks + +def read_pixarea(pixarea: Union[str, Path]) -> xr.DataArray: + """It reads the LISFLOOD pixel area static map + + Parameters: + ----------- + pixarea: string or Path + a NetCDF file with pixel area used to compute weighted statistics. It is specifically meant for geographic projection systems where the area of a pixel varies with latitude + + Returns: + -------- + weight: xr.DataArray + """ + + pixarea = Path(pixarea) + if not pixarea.is_file(): + print(f'ERROR: {pixarea} is not a file!') + sys.exit(1) + + try: + weight = xr.open_dataset(pixarea, engine='netcdf4')['Band1'] + except Exception as e: + print(f'ERROR: The weighing map "{pixarea}" could not be loaded: {e}') + sys.exit(2) + + return weight + +def catchment_statistics(maps: Union[xr.DataArray, xr.Dataset], + masks: Dict[int, xr.DataArray], + statistic: Union[str, List[str]], + weight: Optional[xr.DataArray] = None, + output: Optional[Union[str, Path]] = None, + overwrite: bool = False + ) -> Optional[xr.Dataset]: + """ + Given a set of input maps and catchment masks, it computes catchment statistics. + + Parameters: + ----------- + maps: xarray.DataArray or xarray.Dataset + map or set of maps from which catchment statistics will be computed + masks: dictionary of xr.DataArray + a set of catchment masks. The tool `cutmaps` in this repository can be used to generate them + statistic: string or list of strings + statistics to be computed. Only some statistics are available: 'mean', 'sum', 'std', 'var', 'min', 'max', 'median', 'count' + weight: optional or xr.DataArray + map used to weight each pixel in "maps" before computing the statistics. It is meant to take into account the different pixel area in geographic projections + output: optional, str or pathlib.Path + directory where the resulting NetCDF files will be saved. If not provided, the results are put out as a xr.Dataset + overwrite: boolean + whether to overwrite or skip catchments whose output NetCDF file already exists. By default is False, so the catchment will be skipped + + Returns: + -------- + A xr.Dataset of all catchment statistics or a NetCDF file for each catchment in the "masks" dictionary + """ + + start_time = time.perf_counter() + + if isinstance(maps, xr.DataArray): + maps = xr.Dataset({maps.name: maps}) + + # check statistic + if isinstance(statistic, str): + statistic = [statistic] + possible_stats = ['mean', 'sum', 'std', 'var', 'min', 'max', 'median', 'count'] + assert all(stat in possible_stats for stat in statistic), "All values in 'statistic' should be one of these: {0}".format(', '.join(possible_stats)) + stats_dict = {var: statistic for var in maps} + + # output directory + if output is None: + results = [] + else: + output = Path(output) + output.mkdir(parents=True, exist_ok=True) + + # define coordinates and variables of the resulting Dataset + dims = dict(maps.dims) + dimnames = [dim.lower() for dim in dims] + if 'lat' in dimnames and 'lon' in dimnames: + x_dim, y_dim = 'lon', 'lat' + else: + x_dim, y_dim = 'x', 'y' + del dims[x_dim] + del dims[y_dim] + coords = {dim: maps[dim] for dim in dims} + variables = [f'{var}_{stat}' for var, stats in stats_dict.items() for stat in stats] + + # compute statistics for each catchemnt + # for ID in tqdm(masks.keys(), desc='processing catchments'): + for ID in masks.keys(): + + if output is not None: + fileout = output / f'{ID:04}.nc' + if fileout.exists() and ~overwrite: + print(f'Output file {fileout} already exists. Moving forward to the next catchment') + continue + + # create empty Dataset + coords.update({'id': [ID]}) + maps_aoi = xr.Dataset({var: xr.DataArray(coords=coords, dims=coords.keys()) for var in variables}) + + # apply mask to the dataset + aoi = masks[ID] + masked_maps = maps.sel({x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1) + masked_maps = masked_maps.compute() + + # apply weighting + if weight is not None: + masked_weight = weight.sel({x_dim: aoi[x_dim], y_dim: aoi[y_dim]}).where(aoi == 1) + weighted_maps = masked_maps.weighted(masked_weight.fillna(0)) + + # compute statistics + for var, stats in stats_dict.items(): + for stat in stats: + if (stat in ['mean', 'sum', 'std', 'var']) and (weight is not None): + x = getattr(weighted_maps, stat)(dim=[x_dim, y_dim])[var] + else: + x = getattr(masked_maps, stat)(dim=[x_dim, y_dim])[var] + maps_aoi[f'{var}_{stat}'].loc[{'id': ID}] = x + + # save results + if output is None: + results.append(maps_aoi) + else: + maps_aoi.to_netcdf(fileout) + + end_time = time.perf_counter() + elapsed_time = end_time - start_time + print(f"Time elapsed: {elapsed_time:0.2f} seconds") + + if output is None: + results = xr.concat(results, dim='id') + return results + +def main(argv=sys.argv): + prog = os.path.basename(argv[0]) + parser = argparse.ArgumentParser( + description=""" + Utility to compute catchment statistics from (multiple) NetCDF files. + The mask masp are NetCDF files with values in the area of interest and NaN elsewhere. + The area map is optional and accounts for varying pixel area with latitude. + """, + prog=prog, + ) + parser.add_argument("-i", "--input", required=True, help="Directory containing the input NetCDF files") + parser.add_argument("-m", "--mask", required=True, help="Directory containing the mask NetCDF files") + parser.add_argument("-s", "--statistic", nargs='+', required=True, help='List of statistics to be computed. Possible values: mean, sum, std, var, min, max, median, count') + parser.add_argument("-o", "--output", required=True, help="Directory where the output NetCDF files will be saved") + parser.add_argument("-a", "--area", required=False, default=None, help="NetCDF file of pixel area used to weigh the statistics") + parser.add_argument("-W", "--overwrite", action="store_true", help="Overwrite existing output files") + + args = parser.parse_args() + + try: + maps = read_inputmaps(args.input) + masks = read_masks(args.mask) + weight = read_pixarea(args.area) if args.area is not None else None + catchment_statistics(maps, masks, args.statistic, weight=weight, output=args.output, overwrite=args.overwrite) + except Exception as e: + print(f'ERROR: {e}') + sys.exit(1) + +def main_script(): + sys.exit(main()) + +if __name__ == "__main__": + main_script() diff --git a/src/lisfloodutilities/cutmaps/main.py b/src/lisfloodutilities/cutmaps/main.py index f1323fe..d7762e5 100644 --- a/src/lisfloodutilities/cutmaps/main.py +++ b/src/lisfloodutilities/cutmaps/main.py @@ -70,9 +70,9 @@ def add_args(self): group_filelist.add_argument("-f", "--folder", help='Directory with netCDF files to be cut') group_filelist.add_argument("-F", "--file", help='netCDF file to be cut') - group_filelist.add_argument("-S", "--static-data", - help='Directory with EFAS/GloFAS static maps. ' - 'Output files will have same directories structure') + group_filelist.add_argument("-S", "--subdir", + help='Directory containing folders ' + 'Output files will have same directory-folders structure') self.add_argument("-o", "--outpath", help='path where to save cut files', default='./cutmaps_out', required=True) @@ -93,7 +93,7 @@ def main(cliargs): input_folder = args.folder input_file = args.file - static_data_folder = args.static_data + static_data_folder = args.subdir overwrite = args.overwrite pathout = args.outpath if not os.path.exists(pathout): diff --git a/src/lisfloodutilities/ncextract/ncextract.py b/src/lisfloodutilities/ncextract/ncextract.py index 8c12456..2882359 100644 --- a/src/lisfloodutilities/ncextract/ncextract.py +++ b/src/lisfloodutilities/ncextract/ncextract.py @@ -11,58 +11,142 @@ """ import argparse -import glob -import os import pandas as pd +import os import sys import time import xarray as xr +import cfgrib +from pathlib import Path +from typing import Union, Optional + +def read_points(inputcsv: Union[str, Path]) -> xr.Dataset: + """It reads a CSV file with coordinates of points: gauging stations, reservoirs... + + Parameters: + ----------- + inputcsv: string or pathlib.Path + a CSV indicating the points for which the time series will be extracted. It must contain three columns: the identification of the point, and its two coordinates -def extract(inputcsv, directory, outputfile, nc): - start_time = time.perf_counter() + Returns: + -------- + poi_xr: xarray.Dataset + the input data converted into a Xarray object + """ if not os.path.isfile(inputcsv): print(f'ERROR: {inputcsv} is missing!') - sys.exit(0) + sys.exit(1) + + try: + poi_df = pd.read_csv(inputcsv) + original_columns = poi_df.columns.copy() + poi_df.columns = original_columns.str.lower() + # find columns representing coordinates + coord_1 = [col for col in poi_df.columns if col.startswith('lon') or col.startswith('x')][0] + coord_2 = [col for col in poi_df.columns if col.startswith('lat') or col.startswith('y')][0] + # find the column representing the point ID + idx_col = poi_df.columns.difference([coord_1, coord_2])[0] + # convert to xarray.Dataset + poi_xr = poi_df.set_index(idx_col)[[coord_1, coord_2]].to_xarray() + rename_dim = {idx_col: col for col in original_columns if col.lower() == idx_col} + poi_xr = poi_xr.rename(rename_dim) + except: + print('ERROR: Please check that the CSV file is formatted correctly!') + sys.exit(2) + + return poi_xr + + + +def read_inputmaps(directory: Union[str, Path]) -> xr.Dataset: + """It extract from a series of input files (NetCDF or GRIB) the time series of a set of points + + Parameters: + ----------- + directory: string or pathlib.Path + the directory containing the input files, which can be either in NetCDF or GRIB format + + Returns: + -------- + maps: xarray.dataset + containing the concatenation of all the input maps + """ + + pattern_engine = {'*.nc': 'netcdf4', + '*.grib': 'cfgrib'} if not os.path.isdir(directory): print(f'ERROR: {directory} is missing or not a directory!') - sys.exit(0) + sys.exit(1) + else: + directory = Path(directory) filepaths = [] - for file in glob.glob(os.path.join(directory, '*.nc')): - print(file) - filepaths.append(file) + for pattern, engine in pattern_engine.items(): + filepaths = list(directory.glob(pattern)) + if len(filepaths) > 0: + break if not filepaths: - print(f'ERROR: No NetCDF files found in {directory}') - sys.exit(0) + print(f'ERROR: No NetCDF/GRIB file found in {directory}') + sys.exit(2) - try: - poi = pd.read_csv(inputcsv) - poi_indexer = poi.set_index('id')[['lat', 'lon']].to_xarray() - except: - print('ERROR: Please check that CSV file is formatted correctly!') - sys.exit(0) + print(f'{len(filepaths)} input {engine} file(s) found in "{directory}"') # chunks is set to auto for general purpose processing # it could be optimized depending on input NetCDF - ds = xr.open_mfdataset(filepaths, chunks='auto', parallel=True) + maps = xr.open_mfdataset(filepaths, engine=engine, chunks='auto', parallel=True) + + return maps + + + +def extract_timeseries(maps: xr.Dataset, + poi: xr.Dataset, + outputfile: Optional[Union[str, Path]] = None + ) -> Optional[xr.Dataset]: + """It extract from a series of input files (NetCDF or GRIB) the time series of a set of points + + Parameters: + ----------- + maps: xarray.Dataset + the time stack of input maps from which the time series will be extracted + poi: xarray.Dataset + a Dataset indicating the coordinates of the points of interest. It must have only two variables (the coordinates), and the names of this variables must be dimensions in "maps" + ouputfile: optional, string or pathlib.Path + the file where the results will be saved. It can be either a CSV or a NetCDF file + + Returns: + -------- + By default, it puts out an xarray.Dataset with the extracted time series. Instead, if "outputfile" is provided, results will be saved to a file (CSV or NetCDF) + """ - ds_poi = ds.sel(lat=poi_indexer.lat, lon=poi_indexer.lon, method='nearest') - print(ds_poi) - print("Processing...") + coord_1, coord_2 = list(poi) + if not all(coord in maps.coords for coord in [coord_1, coord_2]): + print(f'ERROR: The variables in "poi" (coordinates) are not coordinates in "maps"') + sys.exit(1) + + # extract time series + maps_poi = maps.sel({coord_1: poi[coord_1], coord_2: poi[coord_2]}, method='nearest') + + if outputfile is None: + return maps_poi.compute() - if nc: - ds_poi.to_netcdf(outputfile) else: - df = ds_poi.to_dataframe() - df_reset = df.reset_index() - df_reset.to_csv(outputfile, index=False) + outputfile = Path(outputfile) + if outputfile.suffix == '.nc': + maps_poi.to_netcdf(outputfile) + elif outputfile.suffix == '.csv': + df = maps_poi.to_dataframe() + df_reset = df.reset_index() + df_reset.to_csv(outputfile, index=False) + else: + print('ERROR: the extension of the output file must be either ".nc" or ".csv"') + sys.exit(2) + print(f'Results exported as {outputfile}') + - end_time = time.perf_counter() - elapsed_time = end_time - start_time - print(f"Time elapsed: {elapsed_time:0.2f} seconds") def main(argv=sys.argv): prog = os.path.basename(argv[0]) @@ -70,7 +154,7 @@ def main(argv=sys.argv): description=""" Utility to extract time series of values from (multiple) NetCDF files at specific coordinates. - Coordinates of points of interest must be + Coordinates of points of interest must be included in a CSV file with at least 3 columns named id, lat, lon. """, @@ -78,15 +162,30 @@ def main(argv=sys.argv): ) parser.add_argument("-i", "--input", required=True, help="Input CSV file (id, lat, lon)") parser.add_argument("-d", "--directory", required=True, help="Input directory with .nc files") - parser.add_argument("-o", "--output", required=True, help="Output file (default is CSV, use -nc for NetCDF)") - parser.add_argument("-nc", "--nc", action='store_true', help='Output to NetCDF') + parser.add_argument("-o", "--output", required=True, help="Output file. Two extensions are supported: .csv or .nc") args = parser.parse_args() - extract(args.input, args.directory, args.output, args.nc) + try: + start_time = time.perf_counter() + + print('Reading input CSV...') + points = read_points(args.input) + print('Reading input maps...') + maps = read_inputmaps(args.directory) + print(maps) + print('Processing...') + extract_timeseries(maps, points, args.output) + + elapsed_time = time.perf_counter() - start_time + print(f"Time elapsed: {elapsed_time:0.2f} seconds") + + except Exception as e: + print(f'ERROR: {e}') + sys.exit(1) def main_script(): sys.exit(main()) if __name__ == "__main__": - main_script() \ No newline at end of file + main_script() diff --git a/src/lisfloodutilities/water-demand-historic/README.md b/src/lisfloodutilities/water-demand-historic/README.md index 76c67df..acf7d28 100644 --- a/src/lisfloodutilities/water-demand-historic/README.md +++ b/src/lisfloodutilities/water-demand-historic/README.md @@ -76,23 +76,23 @@ The module consists of five scripts: The script can be run on a normal desktop PC, support is provided only for Linux. Physical memory requirements depend on the desired geographical extent and resolution of the maps dataset. For instance, the generation of the sectoral water demand dataset for the global domain at 3arcmin (or 0.05 degrees) resolution required 32 GB. # Instructions - -Clone the repository: +Install main lisflood-utilities package in a conda environment: +```bash +conda create --name python=3.7 -c conda-forge +conda activate +conda install -c conda-forge pcraster eccodes gdal +pip install lisflood-utilities +``` +Then clone the repository: ``` -git clone https://github.com/ec-jrc/lisflood-utilities/water-demand-historic -cd lisflood-utilities/water-demand-historic +git clone --single-branch --branch master https://github.com/ec-jrc/lisflood-utilities +cd lisflood-utilities/src/lisfloodutilities/water-demand-historic ``` Produce a configuration file with the correct paths and folders (based on the [included example](config_global_0p1deg.cfg)). -Create and activate a Conda environment and run the scripts as follows: +Run the scripts as follows: ``` -conda create --name --file requirements.txt -conda activate python step1_population_density.py python step2_domestic_demand.py ... -``` -If the environment creation step fails, the following might work: -``` -conda create -n -c conda-forge geopandas h5py pandas numpy netcdf4 matplotlib rasterio scikit-image openpyxl geopy ``` \ No newline at end of file diff --git a/src/lisfloodutilities/water-demand-historic/requirements.txt b/src/lisfloodutilities/water-demand-historic/requirements.txt deleted file mode 100644 index d7818c3..0000000 --- a/src/lisfloodutilities/water-demand-historic/requirements.txt +++ /dev/null @@ -1,189 +0,0 @@ -# This file may be used to create an environment using: -# $ conda create --name --file -# platform: win-64 -affine=2.3.0=py_0 -appdirs=1.4.4=pyh9f0ad1d_0 -attrs=21.2.0=pyhd8ed1ab_0 -blosc=1.21.0=h0e60522_0 -boost-cpp=1.74.0=h5b4e17d_5 -branca=0.4.2=pyhd8ed1ab_0 -brotli=1.0.9=h8ffe710_6 -brotli-bin=1.0.9=h8ffe710_6 -brotlipy=0.7.0=py39hb82d6ee_1003 -bzip2=1.0.8=h8ffe710_4 -c-blosc2=2.0.4=h09319c2_1 -ca-certificates=2021.10.8=h5b45459_0 -cached-property=1.5.2=hd8ed1ab_1 -cached_property=1.5.2=pyha770c72_1 -cairo=1.16.0=hb19e0ff_1008 -certifi=2021.10.8=py39hcbf5309_1 -cffi=1.15.0=py39h0878f49_0 -cfitsio=4.0.0=hd67004f_0 -cftime=1.5.1.1=py39h5d4886f_1 -charls=2.2.0=h39d44d4_0 -charset-normalizer=2.0.8=pyhd8ed1ab_0 -click=8.0.3=py39hcbf5309_1 -click-plugins=1.1.1=py_0 -cligj=0.7.2=pyhd8ed1ab_1 -cloudpickle=2.0.0=pyhd8ed1ab_0 -colorama=0.4.4=pyh9f0ad1d_0 -configparser=5.0.1=py_0 -cryptography=36.0.0=py39h7bc7c5c_0 -curl=7.80.0=h789b8ee_0 -cycler=0.11.0=pyhd8ed1ab_0 -cytoolz=0.11.2=py39hb82d6ee_1 -dask-core=2021.11.2=pyhd8ed1ab_0 -et_xmlfile=1.0.1=py_1001 -expat=2.4.1=h39d44d4_0 -fiona=1.8.20=py39hea8b339_2 -folium=0.12.1.post1=pyhd8ed1ab_0 -font-ttf-dejavu-sans-mono=2.37=hab24e00_0 -font-ttf-inconsolata=3.000=h77eed37_0 -font-ttf-source-code-pro=2.038=h77eed37_0 -font-ttf-ubuntu=0.83=hab24e00_0 -fontconfig=2.13.1=h1989441_1005 -fonts-conda-ecosystem=1=0 -fonts-conda-forge=1=0 -fonttools=4.28.2=py39hb82d6ee_0 -freetype=2.10.4=h546665d_1 -freexl=1.0.6=ha8e266a_0 -fsspec=2021.11.1=pyhd8ed1ab_0 -gdal=3.3.3=py39h3f5efd6_2 -geographiclib=1.52=pyhd8ed1ab_0 -geopandas=0.10.2=pyhd8ed1ab_1 -geopandas-base=0.10.2=pyha770c72_1 -geopy=2.2.0=pyhd8ed1ab_0 -geos=3.10.0=h39d44d4_0 -geotiff=1.7.0=h350af67_3 -gettext=0.19.8.1=ha2e2712_1008 -giflib=5.2.1=h8d14728_2 -h5py=3.6.0=nompi_py39hd4deaf1_100 -hdf4=4.2.15=h0e5069d_3 -hdf5=1.12.1=nompi_h2a0e4a3_102 -icu=68.2=h0e60522_0 -idna=3.1=pyhd3deb0d_0 -imagecodecs=2021.11.20=py39he391c9c_1 -imageio=2.9.0=py_0 -intel-openmp=2021.4.0=h57928b3_3556 -jbig=2.1=h8d14728_2003 -jinja2=3.0.3=pyhd8ed1ab_0 -joblib=1.1.0=pyhd8ed1ab_0 -jpeg=9d=he774522_0 -jxrlib=1.1=hfa6e2cd_2 -kealib=1.4.14=h8995ca9_3 -kiwisolver=1.3.2=py39h2e07f2f_1 -krb5=1.19.2=h20d022d_3 -lcms2=2.12=h2a16943_0 -lerc=3.0=h0e60522_0 -libaec=1.0.6=h39d44d4_0 -libblas=3.9.0=12_win64_mkl -libbrotlicommon=1.0.9=h8ffe710_6 -libbrotlidec=1.0.9=h8ffe710_6 -libbrotlienc=1.0.9=h8ffe710_6 -libcblas=3.9.0=12_win64_mkl -libclang=11.1.0=default_h5c34c98_1 -libcurl=7.80.0=h789b8ee_0 -libdeflate=1.8=h8ffe710_0 -libffi=3.4.2=h8ffe710_5 -libgdal=3.3.3=he0d6347_2 -libglib=2.70.1=h3be07f2_0 -libiconv=1.16=he774522_0 -libkml=1.3.0=h9859afa_1014 -liblapack=3.9.0=12_win64_mkl -libnetcdf=4.8.1=nompi_h1cc8e9d_101 -libpng=1.6.37=ha81a0f5_2 -libpq=13.5=hfcc5ef8_0 -librttopo=1.1.0=h22ffb85_7 -libspatialindex=1.9.3=h39d44d4_4 -libspatialite=5.0.1=h9c61790_10 -libssh2=1.10.0=h680486a_2 -libtiff=4.3.0=hd413186_2 -libwebp-base=1.2.1=h8ffe710_0 -libxml2=2.9.12=hf5bbc77_1 -libzip=1.8.0=hfed4ece_1 -libzlib=1.2.11=h8ffe710_1013 -libzopfli=1.0.3=ha925a31_0 -locket=0.2.0=py_2 -lz4-c=1.9.3=h8ffe710_1 -m2w64-gcc-libgfortran=5.3.0=6 -m2w64-gcc-libs=5.3.0=7 -m2w64-gcc-libs-core=5.3.0=7 -m2w64-gmp=6.1.0=2 -m2w64-libwinpthread-git=5.0.0.4634.697f757=2 -mapclassify=2.4.3=pyhd8ed1ab_0 -markupsafe=2.0.1=py39hb82d6ee_1 -matplotlib=3.5.0=py39hcbf5309_0 -matplotlib-base=3.5.0=py39h581301d_0 -mkl=2021.4.0=h0e2418a_729 -msys2-conda-epoch=20160418=1 -munch=2.5.0=py_0 -munkres=1.1.4=pyh9f0ad1d_0 -netcdf4=1.5.8=nompi_py39hf113b1f_101 -networkx=2.6.3=pyhd8ed1ab_1 -numpy=1.21.4=py39h6635163_0 -olefile=0.46=pyh9f0ad1d_1 -openjpeg=2.4.0=hb211442_1 -openpyxl=3.0.9=pyhd8ed1ab_0 -openssl=1.1.1l=h8ffe710_0 -packaging=21.3=pyhd8ed1ab_0 -pandas=1.3.4=py39h2e25243_1 -partd=1.2.0=pyhd8ed1ab_0 -pcre=8.45=h0e60522_0 -pillow=8.4.0=py39h916092e_0 -pip=21.3.1=pyhd8ed1ab_0 -pixman=0.40.0=h8ffe710_0 -pooch=1.5.2=pyhd8ed1ab_0 -poppler=21.09.0=h24fffdf_3 -poppler-data=0.4.11=hd8ed1ab_0 -postgresql=13.5=h1c22c4f_0 -proj=8.1.1=h1cfcee9_2 -pycparser=2.21=pyhd8ed1ab_0 -pyopenssl=21.0.0=pyhd8ed1ab_0 -pyparsing=3.0.6=pyhd8ed1ab_0 -pyproj=3.2.1=py39h39b2389_2 -pyqt=5.12.3=py39hcbf5309_8 -pyqt-impl=5.12.3=py39h415ef7b_8 -pyqt5-sip=4.19.18=py39h415ef7b_8 -pyqtchart=5.12=py39h415ef7b_8 -pyqtwebengine=5.12.1=py39h415ef7b_8 -pysocks=1.7.1=py39hcbf5309_4 -python=3.9.7=h7840368_3_cpython -python-dateutil=2.8.2=pyhd8ed1ab_0 -python_abi=3.9=2_cp39 -pytz=2021.3=pyhd8ed1ab_0 -pywavelets=1.2.0=py39h5d4886f_1 -pyyaml=6.0=py39hb82d6ee_3 -qt=5.12.9=h5909a2a_4 -rasterio=1.2.10=py39h20dd13d_0 -requests=2.26.0=pyhd8ed1ab_1 -rtree=0.9.7=py39h09fdee3_3 -scikit-image=0.18.3=py39h2e25243_1 -scikit-learn=1.0.1=py39he931e04_2 -scipy=1.7.3=py39hc0c34ad_0 -setuptools=59.4.0=py39hcbf5309_0 -shapely=1.8.0=py39h2358463_2 -six=1.16.0=pyh6c4a22f_0 -snappy=1.1.8=ha925a31_3 -snuggs=1.4.7=py_0 -sqlite=3.37.0=h8ffe710_0 -tbb=2021.4.0=h2d74725_1 -threadpoolctl=3.0.0=pyh8a188c0_0 -tifffile=2021.11.2=pyhd8ed1ab_0 -tiledb=2.3.4=h78dabda_0 -tk=8.6.11=h8ffe710_1 -toolz=0.11.2=pyhd8ed1ab_0 -tornado=6.1=py39hb82d6ee_2 -tzdata=2021e=he74cb21_0 -ucrt=10.0.20348.0=h57928b3_0 -urllib3=1.26.7=pyhd8ed1ab_0 -vc=14.2=hc4473a8_5 -vs2015_runtime=14.29.30037=h902a5da_5 -wheel=0.37.0=pyhd8ed1ab_1 -win_inet_pton=1.1.0=py39hcbf5309_3 -xerces-c=3.2.3=h0e60522_4 -xyzservices=2021.11.0=pyhd8ed1ab_0 -xz=5.2.5=h62dcd97_1 -yaml=0.2.5=he774522_0 -zfp=0.5.5=h0e60522_8 -zlib=1.2.11=vc14h1cdd9ab_1 -zstd=1.5.0=h6255e5f_0 diff --git a/tests/data/catchstats/expected.csv b/tests/data/catchstats/expected.csv new file mode 100644 index 0000000..b414571 --- /dev/null +++ b/tests/data/catchstats/expected.csv @@ -0,0 +1,6 @@ +id,elv_mean,elv_std,elv_min,elv_max,elv_count,ksat1_mean,ksat1_std,ksat1_min,ksat1_max,ksat1_count +33,893.9243774414062,114.87911224365234,699.248779296875,1386.504638671875,308.0,191.7561492919922,14.313340187072754,142.97467041015625,237.45582580566406,308.0 +52,476.6605529785156,95.04017639160156,252.7063751220703,739.0236206054688,932.0,286.1943359375,48.91963577270508,151.40707397460938,509.8167419433594,932.0 +53,1077.2130126953125,214.9817352294922,790.4515991210938,1752.1634521484375,1118.0,319.20208740234375,59.7955322265625,199.0460205078125,481.0557556152344,1118.0 +68,653.2332153320312,381.9631652832031,11.078484535217285,3089.394287109375,17418.0,241.17037963867188,69.94161987304688,111.34638977050781,546.927978515625,17418.0 +84,959.306640625,233.96337890625,411.6016540527344,2288.725341796875,28094.0,282.0586242675781,127.93054962158203,75.86146545410156,932.9584350585938,28094.0 diff --git a/tests/data/catchstats/maps/elv_iberian_01min.nc b/tests/data/catchstats/maps/elv_iberian_01min.nc new file mode 100644 index 0000000..72063e4 Binary files /dev/null and b/tests/data/catchstats/maps/elv_iberian_01min.nc differ diff --git a/tests/data/catchstats/maps/ksat1_f_iberian_01min.nc b/tests/data/catchstats/maps/ksat1_f_iberian_01min.nc new file mode 100644 index 0000000..9bb7d5c Binary files /dev/null and b/tests/data/catchstats/maps/ksat1_f_iberian_01min.nc differ diff --git a/tests/data/catchstats/masks/0033.nc b/tests/data/catchstats/masks/0033.nc new file mode 100644 index 0000000..a76e6c7 Binary files /dev/null and b/tests/data/catchstats/masks/0033.nc differ diff --git a/tests/data/catchstats/masks/0052.nc b/tests/data/catchstats/masks/0052.nc new file mode 100644 index 0000000..a314787 Binary files /dev/null and b/tests/data/catchstats/masks/0052.nc differ diff --git a/tests/data/catchstats/masks/0053.nc b/tests/data/catchstats/masks/0053.nc new file mode 100644 index 0000000..325754c Binary files /dev/null and b/tests/data/catchstats/masks/0053.nc differ diff --git a/tests/data/catchstats/masks/0068.nc b/tests/data/catchstats/masks/0068.nc new file mode 100644 index 0000000..cc05885 Binary files /dev/null and b/tests/data/catchstats/masks/0068.nc differ diff --git a/tests/data/catchstats/masks/0084.nc b/tests/data/catchstats/masks/0084.nc new file mode 100644 index 0000000..7ab0e20 Binary files /dev/null and b/tests/data/catchstats/masks/0084.nc differ diff --git a/tests/data/catchstats/pixarea_iberian_01min.nc b/tests/data/catchstats/pixarea_iberian_01min.nc new file mode 100644 index 0000000..1129900 Binary files /dev/null and b/tests/data/catchstats/pixarea_iberian_01min.nc differ diff --git a/tests/test_catchstats.py b/tests/test_catchstats.py new file mode 100644 index 0000000..5b3cb8d --- /dev/null +++ b/tests/test_catchstats.py @@ -0,0 +1,28 @@ +import unittest +from pathlib import Path +import pandas as pd +import pandas.testing as pdt +from lisfloodutilities.catchstats import read_inputmaps, read_masks, read_pixarea, catchment_statistics + +class TestCatchStats(unittest.TestCase): + + path = Path('tests/data/catchstats') + + def test_catchstats(self): + + # compute test values + maps = read_inputmaps(self.path / 'maps') + masks = read_masks(self.path / 'masks') + weight = read_pixarea(self.path / 'pixarea_iberian_01min.nc') + test = catchment_statistics(maps, masks, ['mean', 'std', 'min', 'max', 'count'], weight=weight, output=None).to_pandas() + + # load expected values + expected = pd.read_csv(self.path / 'expected.csv', index_col='id') + expected.index =expected.index.astype(test.index.dtype) + + # check + try: + pdt.assert_frame_equal(test, expected) + except AssertionError as e: + print(e) + diff --git a/tests/test_ncextract.py b/tests/test_ncextract.py index 556eeff..465adbe 100644 --- a/tests/test_ncextract.py +++ b/tests/test_ncextract.py @@ -1,7 +1,6 @@ import unittest -from lisfloodutilities.compare.nc import NetCDFComparator - -from lisfloodutilities.ncextract import extract +# from lisfloodutilities.compare.nc import NetCDFComparator +from lisfloodutilities.ncextract import read_points, read_inputmaps, extract_timeseries import csv class TestExtract(unittest.TestCase): @@ -12,15 +11,15 @@ def compare_csv_files(self, file1, file2): with open(file1, 'r') as f1, open(file2, 'r') as f2: reader1 = csv.reader(f1) reader2 = csv.reader(f2) - + for row1, row2 in zip(reader1, reader2): if row1 != row2: return False - + # Check if both files have the same number of rows if len(list(reader1)) != len(list(reader2)): return False - + return True def test_extract_csv(self): @@ -28,9 +27,11 @@ def test_extract_csv(self): datasets = 'tests/data/ncextract/datasets' outputfile = 'tests/data/output.csv' expected = 'tests/data/ncextract/expected.csv' - extract(inputcsv, datasets, outputfile, nc=False) + poi = read_points(inputcsv) + maps = read_inputmaps(datasets) + extract_timeseries(maps, poi, outputfile) assert self.compare_csv_files(outputfile, expected) - + # def test_extract_nc(self): # inputcsv = 'tests/data/ncextract/stations.csv' # datasets = 'tests/data/ncextract/datasets' @@ -39,4 +40,4 @@ def test_extract_csv(self): # extract(inputcsv, datasets, outputfile, nc=True) # comp = NetCDFComparator(None, for_testing=True) # comp.compare_files(outputfile, expected) - # assert comp.errors == None \ No newline at end of file + # assert comp.errors == None