diff --git a/DESCRIPTION b/DESCRIPTION index 11181aa..195865f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ncdfgeom Type: Package Title: 'NetCDF' Geometry and Time Series -Version: 1.2.0 +Version: 1.3.0 Authors@R: c(person("David", "Blodgett", role = c("aut", "cre"), email = "dblodgett@usgs.gov"), person("Luke", "Winslow", role = "ctb")) diff --git a/docs/404.html b/docs/404.html index 16a73ca..21aaa2c 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@ Page not found (404) • ncdfgeom - + @@ -32,7 +32,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -109,7 +109,7 @@

Page not found (404)

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/DISCLAIMER.html b/docs/DISCLAIMER.html index 424a601..db4cf1d 100644 --- a/docs/DISCLAIMER.html +++ b/docs/DISCLAIMER.html @@ -1,5 +1,5 @@ -Disclaimer • ncdfgeomDisclaimer • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -87,7 +87,7 @@

Disclaimer

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/articles/geometry.html b/docs/articles/geometry.html index 05a7031..414736f 100644 --- a/docs/articles/geometry.html +++ b/docs/articles/geometry.html @@ -6,7 +6,7 @@ Reading and Writing NetCDF-CF Geometry • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -131,7 +134,7 @@

    Load Spatial Data
    -(vars <- ncmeta::nc_vars(example_file))
    +(vars <- ncmeta::nc_vars(example_file))
     #> # A tibble: 5 × 5
     #>      id name         type      ndims natts
     #>   <int> <chr>        <chr>     <int> <int>
    @@ -146,7 +149,7 @@ 

    Load Spatial Data= "station", variables = vars$name) -> example_file

    Now the NetCDF file looks like:

    -
    #> netcdf file69986203fbb {
    +
    #> netcdf file63806c813359 {
     #> dimensions:
     #>  maxStrlen64 = 64 ;
     #>  station = 2 ;
    @@ -681,7 +684,7 @@ 

    Multiple MultiPolygons wi

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/index.html b/docs/articles/index.html index a3104bb..05de0fe 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -Articles • ncdfgeomArticles • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -87,7 +87,7 @@

    All vignettes

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/ncdfgeom.html b/docs/articles/ncdfgeom.html index 34de8e3..e5586d1 100644 --- a/docs/articles/ncdfgeom.html +++ b/docs/articles/ncdfgeom.html @@ -6,7 +6,7 @@ NetCDF-CF Geometry and Timeseries Tools for R • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -538,7 +541,7 @@

    Read Timeseries and Geometry f

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/ncdfgeom_files/figure-html/plot-1.png b/docs/articles/ncdfgeom_files/figure-html/plot-1.png index c87f77b..c150975 100644 Binary files a/docs/articles/ncdfgeom_files/figure-html/plot-1.png and b/docs/articles/ncdfgeom_files/figure-html/plot-1.png differ diff --git a/docs/articles/polygon_intersection.html b/docs/articles/polygon_intersection.html index 154fab4..d81e3a8 100644 --- a/docs/articles/polygon_intersection.html +++ b/docs/articles/polygon_intersection.html @@ -6,7 +6,7 @@ Area Weight Generation for Polygon Intersections • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -103,81 +103,222 @@

    Area Weight Generation for Polygon

    It is a comparison with the gdptools python package demonstration here.

    -

    See calculate_area_intersection_weights() for additional -demonstration and info.

     
     gdptools_weights <- read.csv(system.file("extdata/gdptools_prl_out.csv", package = "ncdfgeom"), 
                                  colClasses = c("character", "character", "numeric"))
     
    -gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
    +gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
     
     gage_id <- "USGS-01482100"
    -basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
    -huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
    -#> Spherical geometry (s2) switched off
    -#> Spherical geometry (s2) switched on
    -huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
    -#> Spherical geometry (s2) switched off
    -#> Spherical geometry (s2) switched on
    +basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
    +huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
    +huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
     
     org_par <- par(mar = c(0, 0, 0, 0))
    -plot(sf::st_as_sfc(sf::st_bbox(huc12)))
    -plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
    -plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
    -plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
    -

    -
    -par(org_par)
    +plot(sf::st_as_sfc(sf::st_bbox(huc12)))
    +plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
    +plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
    +plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
    +par(org_par)
     
     weights <- ncdfgeom::calculate_area_intersection_weights(
    -  x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
    -  y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
    +  x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
    +  y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
       normalize = TRUE
     )
    -#> Loading required namespace: areal
     
    -weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))
    +weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))

    With weights calculated, we can do a little investigation into the differences.

    -
    +
     
     weights$diff <- weights$w - weights$gdptools_wght
     
     # make sure nothing is way out of whack
     max(weights$diff, na.rm = TRUE)
    -#> [1] 0.0000009205585
     
     # ensure the weights generally sum as we would expect.
     sum(weights$gdptools_wght, na.rm = TRUE)
    -#> [1] 25
     sum(weights$w, na.rm = TRUE)
    -#> [1] 25
     length(unique(na.omit(weights$huc8)))
    -#> [1] 25
     
     # see how many NA values we have in each.
     sum(is.na(weights$w))
    -#> [1] 183
     sum(is.na(weights$gdptools_wght))
    -#> [1] 183
     
     # look at cases where gptools has NA and ncdfgeom does not
    -weights[is.na(weights$gdptools_wght),]
    -#> # A tibble: 183 × 5
    -#>    huc12        huc8      w gdptools_wght  diff
    -#>    <chr>        <chr> <dbl>         <dbl> <dbl>
    -#>  1 011000050101 NA       NA            NA    NA
    -#>  2 011000050201 NA       NA            NA    NA
    -#>  3 011000050202 NA       NA            NA    NA
    -#>  4 011000050203 NA       NA            NA    NA
    -#>  5 011000050305 NA       NA            NA    NA
    -#>  6 011000050501 NA       NA            NA    NA
    -#>  7 011000050504 NA       NA            NA    NA
    -#>  8 020200030604 NA       NA            NA    NA
    -#>  9 020200030801 NA       NA            NA    NA
    -#> 10 020200030802 NA       NA            NA    NA
    -#> # ℹ 173 more rows
    +weights[is.na(weights$gdptools_wght) & !is.na(weights$w),]
    +

    The following example illustrates the nuances between normalized and +non-normalized area weights and shows more specifically how area weight +intersection calculations can be accomplished.

    +

    The set of polygons are a contrived but useful for the sake of +demonstration.

    +
    +
    +library(dplyr)
    +library(sf)
    +library(ncdfgeom)
    +
    +g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))
    +
    +blue1 = sf::st_polygon(g) * 0.8
    +blue2 = blue1 + c(1, 2)
    +blue3 = blue1 * 1.2 + c(-1, 2)
    +
    +pink1 = sf::st_polygon(g)
    +pink2 = pink1 + 2
    +pink3 = pink1 + c(-0.2, 2)
    +pink4 = pink1 + c(2.2, 0)
    +
    +blue = sf::st_sfc(blue1,blue2,blue3)
    +
    +pink = sf::st_sfc(pink1, pink2, pink3, pink4)
    +
    +plot(c(blue,pink), border = NA)
    +plot(blue, border = "#648fff", add = TRUE)
    +plot(pink, border = "#dc267f", add = TRUE)
    +
    +blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3)))
    +pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10)))
    +
    +text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
    +     sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
    +     blue$idblue, col = "#648fff")
    +
    +text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4),
    +     sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])),
    +     pink$idpink, col = "#dc267f")
    +
    +sf::st_agr(blue) <- sf::st_agr(pink) <- "constant"
    +sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070)
    +
    +
    +(blue_pink_norm_false <- 
    +calculate_area_intersection_weights(blue, pink, normalize = FALSE))
    +

    NOTE: normalize = FALSE so weights sum to 1 per source polygon only +when a source polygon is fully covered by the target. The +non-intersecting portion is not included.

    +

    The following breaks down how to use these weights for one source +polygon.

    +
    +blue$val = c(30, 10, 20)
    +blue$area <- as.numeric(sf::st_area(blue))
    +
    +(result <- st_drop_geometry(blue) |>
    +  left_join(blue_pink_norm_false, by = "idblue"))
    +

    To calculate the value for pink-9, we would do:

    +
    +
    +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864))
    +

    This is saying that 0.375 of blue-3 covers pink-9 and 0.6 of blue-2 +covers pink-9. Since we are using area as the weighting method, we +multiply the fraction of each source polygon by its area and the value +we want to create an area weight for. We sum the contributions from +blue-2 and blue-3 to pink-9 and divide by the sum of the combined area +weights.

    +

    Note that because there is no contribution to 9 over some parts of +the polygon, that missing area does not appear. The intersecting areas +are 0.96 and 2.23 meaning that we are missing

    +

    4 - 0.96 - 2.23 = 0.81

    +

    and could rewrite the value for pink-9 as:

    +
    +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) / 
    +  ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864))
    +

    Which evaluates to NA. This is why for this operation we usually drop +NA terms!

    +

    In practice, the above can be accomplished with:

    +
    +(result <- result |>
    +  group_by(idpink) |> # group so we get one row per target
    +  # now we calculate the value for each `pink` with fraction of the area of each
    +  # polygon in `blue` per polygon in `pink` with an equation like this:
    +  summarize(
    +    new_val = sum( (val * w * area) ) / sum(w * area)))
    +

    Now let’s do the same thing but with +normalize = FALSE.

    +
    +
    +(blue_pink_norm_true <-
    +calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE))
    +

    NOTE: normalize = TRUE so weights sum to 1 per target polygon. +Non-overlap is ignored as if it does not exist.

    +

    The following breaks down how to use these weights for one source +polygon.

    +
    +(result <- st_drop_geometry(blue) |>
    +    left_join(blue_pink_norm_true, by = "idblue"))
    +

    To calculate the value for pink-9, we would do:

    +
    +((10 * 0.24) + (20 * 0.5568)) / (0.24 + (0.5568))
    +

    This is saying that the portion of pink-9 that should get the value +from blue-2 is 0.3 and the portion of pink-9 that should get the value +from blue-3 is 0.7. In this form, our weights are transformed to +includethe relative area of the source polygons.

    +

    As shown above as well, the calculation can be accomplished with:

    +
    +(result <- result |>
    +    group_by(idpink) |> # group so we get one row per target
    +    # now we calculate the value for each `pink` with fraction of the area of each
    +    # polygon in `blue` per polygon in `pink` with an equation like this:
    +    summarize(
    +      new_val = sum( (val * w) ) / sum(w)))
    +

    We can look at a more typical arrangement of polygons and look at +this a different way.

    +

    Let’s also look at the values.

    +
    +# say we have data from `blue` that we want sampled to `pink`.
    +# this gives the percent of each `blue` that intersects each `pink`
    +
    +(blue_pink <- calculate_area_intersection_weights(
    +  select(blue, idblue), select(pink, idpink), normalize = FALSE))
    +
    +# NOTE: `w` sums to 1 per `blue` in all cases
    +
    +summarize(group_by(blue_pink, idblue), w = sum(w))
    +
    +# Since normalize is false, we apply weights like:
    +st_drop_geometry(blue) |>
    +  left_join(blue_pink, by = "idblue") |>
    +  mutate(blue_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `blue`
    +  group_by(idpink) |> # group so we get one row per `pink`
    +  # now we calculate the value for each b with fraction of the area of each
    +  # polygon in `blue` per polygon in `pink` with an equation like this:
    +  summarize(
    +    new_val = sum( (val * w * blue_areasqkm) ) / sum(w * blue_areasqkm))
    +
    +# NOTE: `w` is the fraction of the polygon in `blue`. We need to multiply `w` by the
    +# unique area of the polygon it is associated with to get the weighted mean weight.
    +
    +# we can go in reverse if we had data from `pink` that we want sampled to `blue`
    +
    +(pink_blue <- calculate_area_intersection_weights(
    +  select(pink, idpink), select(blue, idblue), normalize = FALSE))
    +
    +# NOTE: `w` sums to 1 per `pink` (source) only where `pink` is fully covered by `blue` (target).
    +
    +summarize(group_by(pink_blue, idpink), w = sum(w))
    +
    +# Now let's look at what happens if we set normalize = TRUE. Here we
    +# get `blue` as source and `pink` as target but normalize the weights so
    +# the area of `blue` is built into `w`.
    +
    +(blue_pink <- calculate_area_intersection_weights(
    +  select(blue, idblue), select(pink, idpink), normalize = TRUE))
    +
    +# NOTE: if we summarize by `pink` (target) `w` sums to 1 only where there is full overlap.
    +
    +summarize(group_by(blue_pink, idpink), w = sum(w))
    +
    +# Since normalize is false, we apply weights like:
    +st_drop_geometry(blue) |>
    +  left_join(blue_pink, by = "idblue") |>
    +  group_by(idpink) |> # group so we get one row per `pink`
    +  # now we weight by the percent of each polygon in `pink` per polygon in `blue`
    +  summarize(new_val = sum( (val * w) ) / sum( w ))
    +
    +# NOTE: `w` is the fraction of the polygon from `blue` overlapping the polygon from `pink`.
    +# The area of `blue` is built into the weight so we just sum the weith times value oer polygon.
    @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -163,7 +166,7 @@

    Reading and Writing Timeseries

    timeseries ID (which is stored as a string).
    -ncmeta::nc_dims(nc_file)
    +ncmeta::nc_dims(nc_file)
     #> # A tibble: 3 × 4
     #>      id name               length unlim
     #>   <int> <chr>               <dbl> <lgl>
    @@ -173,7 +176,7 @@ 

    Reading and Writing Timeseries

    The file has variables for latitude, longitude, altitude, timeseries IDs, and a data variable.

    -ncmeta::nc_vars(nc_file)
    +ncmeta::nc_vars(nc_file)
     #> # A tibble: 6 × 5
     #>      id name                                        type      ndims natts
     #>   <int> <chr>                                       <chr>     <int> <int>
    @@ -186,7 +189,7 @@ 

    Reading and Writing Timeseries

    The primary dimensions in the file are of length, number of time steps and number of time series.

    -ncmeta::nc_dims(nc_file)
    +ncmeta::nc_dims(nc_file)
     #> # A tibble: 3 × 4
     #>      id name               length unlim
     #>   <int> <chr>               <dbl> <lgl>
    @@ -289,7 +292,7 @@ 

    Reading and Writing Timeseries

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/authors.html b/docs/authors.html index eda525b..97b12c3 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,5 +1,5 @@ -Authors and Citation • ncdfgeomAuthors and Citation • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -63,7 +63,7 @@
    @@ -107,7 +107,7 @@

    Citation

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/index.html b/docs/index.html index 88a2c6d..b03fa50 100644 --- a/docs/index.html +++ b/docs/index.html @@ -6,7 +6,7 @@ NetCDF Geometry and Time Series • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -104,7 +104,7 @@

    Installationinstall.packages("ncdfgeom")

    For the latest development version:

    install.packages("remotes")
    -remotes::install_github("DOI-USGS/ncdfgeom")
    +remotes::install_github("DOI-USGS/ncdfgeom")

    Contributing @@ -174,7 +174,7 @@

    Developers

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/news/index.html b/docs/news/index.html index 3d55034..7e90730 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,5 +1,5 @@ -Changelog • ncdfgeomChangelog • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -90,7 +90,7 @@