Skip to content

Commit

Permalink
Translate data sqids (#34)
Browse files Browse the repository at this point in the history
* adding favicons

* Updated example ids and test snapshots (#32)

* Created placeholder function for parsing data squids

* Added parse_squids_filter()

* Little bit of lintr driven clean-up of parse_squid code

* Sqid parsing now handles filters and indicators (and works around indicators and filters potentially having the same sqid)

* Changed my mind on how to structure the sqid replacement code, mulling it over with a cup of tea. Think this works cleaner.

* Updated post_dataset and tests to work with sqid parsing

* Added parsing of location sqids

* Added description for geography parsing

* Renamed parse_sqids_geographies to parse_sqid_locations just to make it a bit more accurate and clear

* Basic fixes from pull request comments

* Removed duplicate section in pkgdown

* Minor inconsistency fix to debug / verbose descriptions

---------

Co-authored-by: SELBY <[email protected]>
  • Loading branch information
rmbielby and SELBY authored Oct 21, 2024
1 parent aee9725 commit 6effbec
Show file tree
Hide file tree
Showing 34 changed files with 383 additions and 62 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ Imports:
jsonlite,
dplyr,
stringr,
rlang,
magrittr
data.table,
magrittr,
rlang
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(api_url)
export(api_url_pages)
export(api_url_query)
export(convert_api_filter_type)
export(example_data_raw)
export(example_geography_query)
export(example_id)
export(example_json_query)
Expand All @@ -18,6 +19,9 @@ export(parse_meta_filter_columns)
export(parse_meta_filter_item_ids)
export(parse_meta_location_ids)
export(parse_meta_time_periods)
export(parse_sqids_filters)
export(parse_sqids_indicators)
export(parse_sqids_locations)
export(parse_tojson_filter)
export(parse_tojson_filter_eq)
export(parse_tojson_filter_in)
Expand All @@ -34,3 +38,4 @@ export(validate_ees_id)
export(validate_page_size)
export(validate_time_periods)
export(warning_max_pages)
importFrom(data.table,":=")
2 changes: 1 addition & 1 deletion R/api_url.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' @param page Page number of query results to return
#' @param api_version EES API version
#' @param environment EES environment to connect to: "dev", "test", "preprod" or "prod"
#' @param verbose Run with additional contextual messaging, logical, default = FALSE
#' @param verbose Run with additional contextual messaging. Logical, default = FALSE
#' @return A string containing the URL for connecting to the EES API
#' @export
#'
Expand Down
7 changes: 7 additions & 0 deletions R/eesyapi-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#' @keywords internal
"_PACKAGE"

## usethis namespace: start
#' @importFrom data.table :=
## usethis namespace: end
NULL
35 changes: 32 additions & 3 deletions R/examples.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' Example ID
#'
#' @description
#' This function returns examples of working IDs that can be used with the API.
#'
Expand Down Expand Up @@ -94,6 +93,36 @@ example_id <- function(
}
}

#' Example raw data
#'
#' @description
#' Download some example raw data. Mainly intended for use in developing / testing the sqid parsing
#' or as an example of getting raw data if any end users would prefer to do the sqid parsing
#' themselves.
#'
#' @inheritParams example_id
#' @param size Number of rows to return (max = 1000)
#'
#' @return Nested list form of example data from the API
#' @export
#'
#' @examples
#' example_data_raw()
example_data_raw <- function(
group = "attendance",
size = 32) {
eesyapi::api_url(
"get-data",
dataset_id = example_id(group = group),
indicators = example_id("indicator", group = group),
page = 1, page_size = size
) |>
httr::GET() |>
httr::content("text") |>
jsonlite::fromJSON() |>
magrittr::use_series("results")
}

#' Create an example json query string
#' @description
#' Create an example json query string for use in examples and tests
Expand Down Expand Up @@ -135,13 +164,13 @@ example_geography_query <- function(level = "nat_yorks") {
return_level = c("NAT", "REG"),
search_level = c("NAT", "REG"),
identifier_type = c("code", "code"),
identifier = c("E92000001", "E12000002")
identifier = c("E92000001", "E12000003")
),
nat_yorks_yorkslas = data.frame(
return_level = c("NAT", "REG", "LA"),
search_level = c("NAT", "REG", "REG"),
identifier_type = c("code", "code", "code"),
identifier = c("E92000001", "E12000004", "E12000004")
identifier = c("E92000001", "E12000003", "E12000003")
)
)
example_geography_queries |>
Expand Down
2 changes: 1 addition & 1 deletion R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ get_dataset <- function(
message(paste("Total number of pages: ", response_json$paging$totalPages))
}
dfresults <- response_json$results |>
eesyapi::parse_api_dataset(verbose = verbose)
eesyapi::parse_api_dataset(dataset_id = dataset_id, verbose = verbose)
# Unless the user has requested a specific page, then assume they'd like all pages collated and
# recursively run the query.
if (is.null(page)) {
Expand Down
1 change: 1 addition & 0 deletions R/get_meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ parse_meta_location_ids <- function(api_meta_locations,
location_items_i <- api_meta_locations$options |>
magrittr::extract2(i) |>
dplyr::mutate(
geographic_level_code = api_meta_locations$level$code[i],
geographic_level = api_meta_locations$level$label[i]
) |>
dplyr::rename(item_id = "id")
Expand Down
31 changes: 17 additions & 14 deletions R/parse_api_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,19 @@
#' parse_api_dataset()
#'
#' @param api_data_result A json data result list as returned from the API
#' @param dataset_id ID of data set to be connected to.
#' @param verbose Run in verbose mode, logical, default = FALSE
#' @inheritParams api_url
#'
#' @return Data frame containing API data results
#' @export
#'
#' @examples
#' api_url(
#' "get-data",
#' dataset_id = example_id(), indicators = example_id("indicator"), page_size = 10
#' ) |>
#' httr::GET() |>
#' httr::content("text") |>
#' jsonlite::fromJSON() |>
#' parse_api_dataset()
#' example_data_raw(group = "attendance") |>
#' parse_api_dataset(example_id(group = "attendance"))
parse_api_dataset <- function(
api_data_result,
dataset_id = NULL,
dataset_id,
dataset_version = NULL,
api_version = NULL,
verbose = FALSE) {
if (!is.null(dataset_id)) {
eesyapi::validate_ees_id(dataset_id, level = "dataset")
Expand All @@ -41,12 +36,20 @@ parse_api_dataset <- function(
print(names(api_data_result$locations))
print(names(api_data_result$filters))
}
meta <- eesyapi::get_meta(
dataset_id,
dataset_version = dataset_version,
api_version = api_version
)
dplyr::bind_cols(
api_data_result$timePeriod,
data.frame(geographic_level = api_data_result$geographicLevel),
api_data_result$locations,
api_data_result$filters,
api_data_result$values
api_data_result$locations |>
eesyapi::parse_sqids_locations(meta),
api_data_result$filters |>
eesyapi::parse_sqids_filters(meta),
api_data_result$values |>
eesyapi::parse_sqids_indicators(meta),
)
# Next aim here is to pull in the meta data automatically at this point to translate
# all the API codes...
Expand Down
120 changes: 120 additions & 0 deletions R/parse_sqids.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' Parse location sqids
#'
#' @description
#' The API uses unique IDs (sqids) to identify each location in a data set. This function parses
#' those into the corresponding location codes and names based on the meta data stored on the API
#' for the data set.
#'
#' @inheritParams parse_sqids_filters
#' @param locations A set of location columns as taken from a data set downloaded from the API
#'
#' @return Data frame of parsed geography information
#' @export
#'
#' @examples
#' example_data_raw() |>
#' magrittr::use_series("locations") |>
#' parse_sqids_locations(get_meta(example_id(group = "attendance")))
parse_sqids_locations <- function(locations, meta, verbose = FALSE) {
lookup <- meta |>
magrittr::use_series("locations") |>
dplyr::filter(!!rlang::sym("geographic_level_code") %in% names(locations)) |>
dplyr::rename(name = "label")
for (level in names(locations)) {
locations <- locations |>
dplyr::rename("item_id" = !!rlang::sym(level)) |>
dplyr::left_join(
lookup |>
dplyr::filter(!!rlang::sym("geographic_level_code") == level) |>
dplyr::select(-dplyr::all_of(c("geographic_level_code", "geographic_level"))) |>
dplyr::rename_with(~ paste0(tolower(level), "_", .x), !dplyr::matches("item_id")),
by = dplyr::join_by("item_id")
) |>
dplyr::select(-"item_id")
}
return(
locations |>
dplyr::select(
dplyr::where(
~ !all(is.na(.x))
)
)
)
}

#' Parse IDs in a set of filters
#'
#' @description
#' The API uses unique IDs (sqids) to identify each filter column and its contents (filter items).
#' This function parses those into the data creators' id and item labels based on the meta data
#' stored on the API for the data set.
#'
#' @param filters A set of filter item columns as taken from a data set downloaded from the API
#' @param meta Meta data for the data set as provided by `get_meta()`
#' @param verbose Run in verbose mode with debugging messages
#'
#' @return Data frame
#' @export
#'
#' @examples
#' example_data_raw() |>
#' magrittr::use_series("filters") |>
#' parse_sqids_filters(get_meta(example_id(group = "attendance")))
parse_sqids_filters <- function(filters, meta, verbose = FALSE) {
filter_ids <- meta |>
magrittr::use_series("filter_columns") |>
dplyr::filter(!!rlang::sym("col_id") %in% colnames(filters)) |>
dplyr::pull("col_id")
if (verbose) {
print(filter_ids)
}
for (column_sqid in filter_ids) {
col_name <- meta |>
magrittr::use_series("filter_columns") |>
dplyr::filter(!!rlang::sym("col_id") == column_sqid) |>
dplyr::pull("col_name")
if (verbose) {
message("Matched ", column_sqid, " to ", col_name)
}
lookup <- meta |>
magrittr::use_series("filter_items") |>
dplyr::filter(!!rlang::sym("col_id") == column_sqid) |>
dplyr::select("item_label", "item_id") |>
dplyr::rename(
!!rlang::sym(col_name) := "item_label",
!!rlang::sym(column_sqid) := "item_id"
)
filters <- filters |>
dplyr::left_join(lookup, by = column_sqid) |>
dplyr::select(-dplyr::all_of(column_sqid))
}
return(filters)
}

#' Parse IDs in a set of indicators
#'
#' @description
#' The API uses unique IDs (sqids) to identify each indicator column. This function parses those
#' into the data creators' column names based on the meta data stored on the API for the data set.
#'
#' @inheritParams parse_sqids_filters
#' @param indicators A set of indicator columns as taken from a data set downloaded from the API
#'
#' @return Data frame
#' @export
#'
#' @examples
#' example_data_raw(group = "attendance") |>
#' magrittr::use_series("values") |>
#' parse_sqids_indicators(get_meta(example_id(group = "attendance")))
parse_sqids_indicators <- function(indicators, meta, verbose = FALSE) {
indicator_ids <- meta |>
magrittr::use_series("indicators") |>
dplyr::filter(!!rlang::sym("col_id") %in% colnames(indicators))
indicator_lookup <- indicator_ids |>
dplyr::pull("col_id")
names(indicator_lookup) <- indicator_ids |> dplyr::pull("col_name")
indicators <- indicators |>
dplyr::rename(dplyr::all_of(indicator_lookup))
return(indicators)
}
6 changes: 4 additions & 2 deletions R/post_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ post_dataset <- function(
json_body <- readLines(json_query) |>
paste0(collapse = "\n")
} else {
message("Parsing query options")
if (verbose) {
message("Parsing query options")
}
json_body <- json_query
}
} else {
Expand Down Expand Up @@ -119,7 +121,7 @@ post_dataset <- function(
message(paste("Total number of pages: ", response_json$paging$totalPages))
}
dfresults <- response_json$results |>
eesyapi::parse_api_dataset(verbose = verbose)
eesyapi::parse_api_dataset(dataset_id, verbose = verbose)
# Unless the user has requested a specific page, then assume they'd like all pages collated and
# recursively run the query.
if (is.null(page) && is.null(json_query)) {
Expand Down
2 changes: 1 addition & 1 deletion R/post_dataset_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#'
#' @inheritParams api_url
#' @inheritParams parse_tojson_geographies
#' @param debug Run POST query in debug mode: logic, default: FALSE
#' @param debug Run POST query in debug mode. Logical, default = FALSE
#'
#' @return String containing json query body for use with http POST request
#' @export
Expand Down
7 changes: 4 additions & 3 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ reference:
- get_dataset
- post_dataset
- parse_api_dataset
- starts_with("parse_sqid")

- title: Examples
desc: Functions to create useful example cases for tests and code examples
- title: Generate example IDs and data
desc: These functions are used widely to create working example code and tests
contents:
- starts_with("example_")
- starts_with("example")

- title: Validation functions
desc: These functions are used across the package to validate elements being passed as part of an API url or query.
Expand Down
2 changes: 1 addition & 1 deletion man/api_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/eesyapi-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6effbec

Please sign in to comment.