diff --git a/DESCRIPTION b/DESCRIPTION index 52263ef..ff764c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: eesyapi Title: EES-y API -Version: 0.2.1 +Version: 0.3.0 Authors@R: c( person("Rich", "Bielby", , "richard.bielby@education.gov.uk", role = c("aut", "cre"), @@ -21,4 +21,6 @@ Imports: httr, jsonlite, dplyr, - stringr + stringr, + rlang, + magrittr diff --git a/NAMESPACE b/NAMESPACE index 4306f15..2422fce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,10 @@ export(api_url) export(api_url_pages) export(api_url_query) +export(convert_api_filter_type) +export(example_geography_query) export(example_id) +export(example_json_query) export(get_data_catalogue) export(get_dataset) export(get_meta) @@ -11,12 +14,23 @@ export(get_meta_response) export(get_publications) export(http_request_error) export(parse_api_dataset) -export(parse_filter_in) export(parse_meta_filter_columns) export(parse_meta_filter_item_ids) export(parse_meta_location_ids) export(parse_meta_time_periods) +export(parse_tojson_filter) +export(parse_tojson_filter_eq) +export(parse_tojson_filter_in) +export(parse_tojson_geographies) +export(parse_tojson_indicators) +export(parse_tojson_location) +export(parse_tojson_params) +export(parse_tojson_time_periods) +export(parse_tourl_filter_in) +export(post_dataset) export(query_dataset) +export(validate_ees_filter_type) export(validate_ees_id) export(validate_page_size) +export(validate_time_periods) export(warning_max_pages) diff --git a/NEWS.md b/NEWS.md index dd75304..2c28d61 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,17 @@ +# eesyapi 0.3.0 + +* Created capacity to query data using POST: + - `query_dataset()`: Now defaults to using POST instead of GET + - `post_dataset()`: Sends a query of a data set, either using a json file, json string or + parameters +* Updated how `example_id()` works to allow more complex examples + # eesyapi 0.2.1 -* Updated `get_meta()` to work with new API meta output (addition of id alongside col_name and label) +* Created initial `query_dataset()` function that queries a data set using `get_dataset()` +* Created `get_dataset()` function that queries a data set using GET and URL parameters +* Updated `get_meta()` to work with new API meta output (addition of id alongside col_name and +label) * Removed redundant function: `parse_meta_filter_columns()` * Hex logo added for documentation diff --git a/R/api_url.R b/R/api_url.R index c7f50b8..d60a2d4 100644 --- a/R/api_url.R +++ b/R/api_url.R @@ -24,8 +24,7 @@ #' @param page Page number of query results to return #' @param api_version EES API version #' @param environment EES environment to connect to: "dev", "test", "preprod" or "prod" -#' @param verbose Add extra contextual information whilst running -#' +#' @param verbose Run with additional contextual messaging, logical, default = FALSE #' @return A string containing the URL for connecting to the EES API #' @export #' diff --git a/R/api_url_query.R b/R/api_url_query.R index e1a2c8c..6c879a6 100644 --- a/R/api_url_query.R +++ b/R/api_url_query.R @@ -27,17 +27,17 @@ api_url_query <- function( eesyapi::validate_ees_id(indicators, level = "indicator") # Create the appropriate query strings for each level provided if (!is.null(time_periods)) { - query_time_periods <- eesyapi::parse_filter_in(time_periods, "time_periods") + query_time_periods <- eesyapi::parse_tourl_filter_in(time_periods, "time_periods") } if (!is.null(geographic_levels)) { - query_geographic_levels <- eesyapi::parse_filter_in( + query_geographic_levels <- eesyapi::parse_tourl_filter_in( geographic_levels, - type = "geographic_levels" + filter_type = "geographic_levels" ) } if (!is.null(locations)) { eesyapi::validate_ees_id(locations, level = "location") - query_locations <- eesyapi::parse_filter_in(locations, type = "locations") + query_locations <- eesyapi::parse_tourl_filter_in(locations, filter_type = "locations") } if (!is.null(filter_items)) { # Note the idea below was to differentiate the logic between AND / OR based on whether @@ -50,11 +50,14 @@ api_url_query <- function( for (filter_set in filter_items) { query_filter_items <- paste0( query_filter_items, - eesyapi::parse_filter_in(filter_set, type = "filter_items") + eesyapi::parse_tourl_filter_in(filter_set, filter_type = "filter_items") ) } } else { - query_filter_items <- eesyapi::parse_filter_in(filter_items, type = "filter_items") + query_filter_items <- eesyapi::parse_tourl_filter_in( + filter_items, + filter_type = "filter_items" + ) } } query_indicators <- paste0( diff --git a/R/api_url_query_utils.R b/R/api_url_query_utils.R index 86ed3cc..18afaa5 100644 --- a/R/api_url_query_utils.R +++ b/R/api_url_query_utils.R @@ -5,25 +5,21 @@ #' use with querying a data set via GET. #' #' @param items items to be included in the "in" statement -#' @param type type of filter items being queried: "time_periods", "geographic_levels", +#' @param filter_type type of filter being queried: "time_periods", "geographic_levels", #' "locations" or "filter_items" #' #' @return Query string for use in URL based API queries #' @export #' #' @examples -#' parse_filter_in(c("2024|W11", "2024|W12"), type = "time_periods") -parse_filter_in <- function( +#' parse_tourl_filter_in(c("2024|W11", "2024|W12"), filter_type = "time_periods") +parse_tourl_filter_in <- function( items, - type) { - if (!(type %in% c("time_periods", "geographic_levels", "locations", "filter_items"))) { - stop("type keyword should be one of time_periods, geographic_levels, locations or filter_items") - } - type_string <- type |> - stringr::str_replace("_item", "") - type_string <- gsub("_(\\w?)", "\\U\\1", type_string, perl = TRUE) + filter_type) { + eesyapi::validate_ees_filter_type(filter_type) + type_string <- eesyapi::convert_api_filter_type(filter_type) if (!is.null(items)) { - if (type %in% c("time_period", "locations")) { + if (filter_type %in% c("time_period", "locations")) { items <- gsub("\\|", "%7C", items) } paste0( diff --git a/R/convert_api_filter_type.R b/R/convert_api_filter_type.R new file mode 100644 index 0000000..83abb5b --- /dev/null +++ b/R/convert_api_filter_type.R @@ -0,0 +1,23 @@ +#' Convert filter type to API filter type +#' +#' @description +#' The API uses a slightly different naming convention for the different types of +#' filters to what is used by analysts within data files. The function just converts +#' from the file versions to the API versions. +#' +#' @inheritParams parse_tourl_filter_in +#' +#' @return String containing API friendly filter type descriptor +#' @export +#' +#' @examples +#' convert_api_filter_type("filter_items") +#' convert_api_filter_type("geographic_levels") +#' convert_api_filter_type("locations") +#' convert_api_filter_type("filter_items") +convert_api_filter_type <- function(filter_type) { + eesyapi::validate_ees_filter_type(filter_type) + filter_type <- filter_type |> + stringr::str_replace("_item", "") + gsub("_(\\w?)", "\\U\\1", filter_type, perl = TRUE) +} diff --git a/R/example_id.R b/R/example_id.R deleted file mode 100644 index 523a396..0000000 --- a/R/example_id.R +++ /dev/null @@ -1,98 +0,0 @@ -#' Example ID -#' @description -#' This function returns examples of working IDs that can be used with the API. -#' -#' @param level Level of ID example to return: "publication" or "data set" -#' @param environment Environment to return a working example for -#' @param group Choose the publication group of examples to use. Can be "attendance". -#' -#' @return String containing an example ID present in the API -#' @export -#' -#' @examples -#' example_id() -example_id <- function( - level = "dataset", - environment = "dev", - group = "public-api-testing") { - example_ids <- data.frame( - levels = c( - "publication", - "dataset", - "location_id", - "location_code", - "filter_item", - "indicator", - "publication", - "dataset", - "location_id", - "location_code", - "filter_item", - "indicator" - ), - environments = c( - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev", - "dev" - ), - example_group = c( - "attendance", - "attendance", - "attendance", - "attendance", - "attendance", - "attendance", - "public-api-testing", - "public-api-testing", - "public-api-testing", - "public-api-testing", - "public-api-testing", - "public-api-testing" - ), - examples = c( - "b6d9ed96-be68-4791-abc3-08dcaba68c04", - "7c0e9201-c7c0-ff73-bee4-304e731ec0e6", - "NAT|id|dP0Zw", - "NAT|code|E92000001", - "hl2Gy", - "bqZtT", - "d823e4df-626f-4450-9b21-08dc8b95fc02", - "830f9201-9e11-ad75-8dcd-d2efe2834457", - "LA|id|ml79K", - "NAT|code|E92000001", - "HsQzL", - "h8fyW" - ) - ) - if (level == "all") { - return(example_ids) - } else { - if (!(level %in% example_ids$levels)) { - stop( - paste0( - "Non-valid element level received by validate_id.\n", - "Should be one of:\n", - paste(example_ids$levels, collapse = "\", \"") - ) - ) - } - return( - example_ids |> - dplyr::filter( - example_ids$levels == level, - example_ids$environments == environment, - example_ids$example_group == group - ) |> - dplyr::pull("examples") - ) - } -} diff --git a/R/examples.R b/R/examples.R new file mode 100644 index 0000000..ac82b9c --- /dev/null +++ b/R/examples.R @@ -0,0 +1,149 @@ +#' Example ID +#' +#' @description +#' This function returns examples of working IDs that can be used with the API. +#' +#' @param level Level of ID example to return: "publication" or "data set" +#' @param environment Environment to return a working example for +#' @param group Choose the publication group of examples to use. Can be "attendance". +#' +#' @return String containing an example ID present in the API +#' @export +#' +#' @examples +#' example_id() +example_id <- function( + level = "dataset", + environment = "dev", + group = "public-api-testing") { + example_id_list <- list( + attendance = list( + dev = list( + publication = "b6d9ed96-be68-4791-abc3-08dcaba68c04", + dataset = "7c0e9201-c7c0-ff73-bee4-304e731ec0e6", + time_period = "2024|W23", + time_periods = c("2024|W21", "2024|W23"), + location_id = "NAT|id|dP0Zw", + location_ids = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + location_code = "NAT|code|E92000001", + filter = "4kdUZ", + filter_item = "5UNdi", + filter_items_long = list( + attendance_status = c("pmRSo", "7SdXo"), + attendance_type = c("CvuId", "6AXrf", "0k3T5", "YdkHK"), + education_phase = c("ThDPJ", "crH31"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ), + filter_items_short = list( + attendance_status = c("pmRSo"), + attendance_type = c("CvuId", "6AXrf"), + education_phase = c("ThDPJ", "crH31"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ), + indicator = "bqZtT" + ) + ), + `public-api-testing` = list( + dev = list( + publication = "d823e4df-626f-4450-9b21-08dc8b95fc02", + dataset = "830f9201-9e11-ad75-8dcd-d2efe2834457", + location_id = "LA|id|ml79K", + location_code = "NAT|code|E92000001", + filter = "01tT5", + filter_item = "wEZcb", + indicator = "PbNeb" + ) + ) + ) + if (!(group %in% names(example_id_list))) { + stop(paste0("Chosen group (", group, ") not found in examples list.")) + } + if (!(environment %in% c("dev"))) { + stop(paste0("Chosen environment (", environment, ") should be one of: \"dev\".")) + } + + group_examples <- example_id_list |> + magrittr::extract2(group) |> + magrittr::extract2(environment) + + if (any(level == "all")) { + return(group_examples) + } else { + if (any(!(level %in% names(group_examples)))) { + stop( + paste0( + "Non-valid element level received by validate_id.\n", + "Should be one of:\n\"", + paste(names(group_examples), collapse = "\", \""), + "\"." + ) + ) + } + return( + if (length(level) > 1) { + group_examples |> + magrittr::extract(level) |> + unlist() + } else { + group_examples |> + magrittr::extract2(level) + } + ) + } +} + +#' Create an example json query string +#' @description +#' Create an example json query string for use in examples and tests +#' +#' @return String containing an example json query +#' @export +#' +#' @examples +#' example_json_query() |> cat() +example_json_query <- function() { + eesyapi::parse_tojson_params( + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + filter_items = list( + attendance_status = c("pmRSo"), + attendance_type = c("CvuId", "6AXrf"), + education_phase = c("ThDPJ", "crH31"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ) + ) +} + +#' Create an example geography-query data frame +#' +#' @param level Query level within available options, can be one of \"nat_yorks\" or +#' \"nat_yorks_yorkslas\" +#' +#' @return Data frame containing an example geography query +#' @export +#' +#' @examples +#' example_geography_query() +example_geography_query <- function(level = "nat_yorks") { + example_geography_queries <- list( + nat_yorks = + data.frame( + return_level = c("NAT", "REG"), + search_level = c("NAT", "REG"), + identifier_type = c("code", "code"), + identifier = c("E92000001", "E12000002") + ), + nat_yorks_yorkslas = data.frame( + return_level = c("NAT", "REG", "LA"), + search_level = c("NAT", "REG", "REG"), + identifier_type = c("code", "code", "code"), + identifier = c("E92000001", "E12000004", "E12000004") + ) + ) + example_geography_queries |> + magrittr::extract2(level) +} diff --git a/R/get_dataset.R b/R/get_dataset.R index 919d585..c2e0094 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -18,7 +18,7 @@ #' @examples #' get_dataset( #' example_id(), -#' geographic_levels = "NAT", +#' geographic_levels = c("SCH"), #' filter_items = example_id("filter_item"), #' indicators = example_id("indicator") #' ) diff --git a/R/get_meta.R b/R/get_meta.R index d365087..b4986fe 100644 --- a/R/get_meta.R +++ b/R/get_meta.R @@ -5,28 +5,28 @@ #' look-up tables from human readable labels to ids used in the API, or the raw response from the #' meta endpoint. #' -#' @param dataset_id ID of data set to be connected to -#' @param dataset_version Version of data set to be connected to -#' @param api_version EES API version +#' @inheritParams api_url #' #' @return List of data frames containing a data set's meta data #' @export #' #' @examples #' get_meta(example_id()) -get_meta <- function(dataset_id, dataset_version = NULL, api_version = NULL) { +get_meta <- function(dataset_id, dataset_version = NULL, api_version = NULL, + verbose = FALSE) { meta_data_response <- get_meta_response( dataset_id, dataset_version = dataset_version, api_version = api_version, - parse = TRUE + parse = TRUE, + verbose = verbose ) meta_data <- list( - time_periods = parse_meta_time_periods(meta_data_response$timePeriods), - locations = parse_meta_location_ids(meta_data_response$locations), - filter_columns = parse_meta_filter_columns(meta_data_response$filters), - filter_items = parse_meta_filter_item_ids(meta_data_response$filters), - indicators = parse_meta_filter_columns(meta_data_response$indicators) + time_periods = parse_meta_time_periods(meta_data_response$timePeriods, verbose = verbose), + locations = parse_meta_location_ids(meta_data_response$locations, verbose = verbose), + filter_columns = parse_meta_filter_columns(meta_data_response$filters, verbose = verbose), + filter_items = parse_meta_filter_item_ids(meta_data_response$filters, verbose = verbose), + indicators = parse_meta_filter_columns(meta_data_response$indicators, verbose = verbose) ) return(meta_data) } @@ -36,9 +36,7 @@ get_meta <- function(dataset_id, dataset_version = NULL, api_version = NULL) { #' @description #' Get the metadata information for a data set available from the EES API. #' -#' @param dataset_id ID of data set to be connected to -#' @param dataset_version Version of data set to be connected to -#' @param api_version EES API version +#' @inheritParams api_url #' @param parse Parse result into structured list #' #' @return Results of query to API meta data endpoint @@ -50,7 +48,8 @@ get_meta_response <- function( dataset_id, dataset_version = NULL, api_version = NULL, - parse = TRUE) { + parse = TRUE, + verbose = FALSE) { # Check that the parse flag is valid if (is.logical(parse) == FALSE) { stop( @@ -81,6 +80,7 @@ get_meta_response <- function( #' Parse API meta to give the time periods #' +#' @inheritParams api_url #' @param api_meta_time_periods Time periods information provided by the API output #' #' @return Data frame containing location item codes matched @@ -89,7 +89,11 @@ get_meta_response <- function( #' @examples #' get_meta_response(example_id())$timePeriods |> #' parse_meta_time_periods() -parse_meta_time_periods <- function(api_meta_time_periods) { +parse_meta_time_periods <- function(api_meta_time_periods, + verbose = FALSE) { + if (!("code" %in% names(api_meta_time_periods))) { + stop("Code column not found in timePeriods data") + } time_periods <- api_meta_time_periods |> dplyr::mutate(code_num = as.numeric(gsub("[a-zA-Z]", "", api_meta_time_periods$code))) time_periods <- time_periods |> @@ -101,6 +105,7 @@ parse_meta_time_periods <- function(api_meta_time_periods) { #' Parse API meta to give the locations #' +#' @inheritParams api_url #' @param api_meta_locations Locations information provided by the API output #' #' @return Data frame containing location item codes matched @@ -109,32 +114,38 @@ parse_meta_time_periods <- function(api_meta_time_periods) { #' @examples #' get_meta_response(example_id())$locations |> #' parse_meta_location_ids() -parse_meta_location_ids <- function(api_meta_locations) { - nlevels <- length(api_meta_locations$level) - location_items <- data.frame( - geographic_level = NA, - code = NA, - label = NA, - item_id = NA - ) - location_items <- location_items |> - dplyr::filter(!is.na(location_items$geographic_level)) +parse_meta_location_ids <- function(api_meta_locations, + verbose = FALSE) { + nlevels <- nrow(api_meta_locations$level) for (i in 1:nlevels) { - location_items_i <- as.data.frame( - api_meta_locations$options[i] - ) |> - dplyr::mutate(geographic_level = api_meta_locations$level$label[i]) - location_items <- location_items |> - rbind( - location_items_i |> - dplyr::select("geographic_level", "code", "label", item_id = "id") - ) + location_items_i <- api_meta_locations$options |> + magrittr::extract2(i) |> + dplyr::mutate( + geographic_level = api_meta_locations$level$label[i] + ) |> + dplyr::rename(item_id = "id") + if (verbose) { + message(paste0("Location level #", i)) + } + if (verbose) { + print(location_items_i) + } + if (i == 1) { + location_items <- location_items_i + } else { + location_items <- location_items |> + dplyr::bind_rows(location_items_i) + } + } + if (verbose) { + message("Collated location levels into single data frame.") } return(location_items) } #' Parse API meta to give the filter columns #' +#' @inheritParams api_url #' @param api_meta_filters Filter information provided by the API output #' #' @return data frame containing column names and labels @@ -143,7 +154,8 @@ parse_meta_location_ids <- function(api_meta_locations) { #' @examples #' get_meta_response(example_id())$filters |> #' parse_meta_filter_columns() -parse_meta_filter_columns <- function(api_meta_filters) { +parse_meta_filter_columns <- function(api_meta_filters, + verbose = FALSE) { data.frame( col_id = api_meta_filters$id, col_name = api_meta_filters$column, @@ -153,6 +165,7 @@ parse_meta_filter_columns <- function(api_meta_filters) { #' Parse API meta to give the filter item codes #' +#' @inheritParams api_url #' @param api_meta_filters Filter information provided by the API output #' #' @return Data frame containing filter item codes matched to filter item labels and col_name @@ -161,7 +174,8 @@ parse_meta_filter_columns <- function(api_meta_filters) { #' @examples #' get_meta_response(example_id())$filters |> #' parse_meta_filter_item_ids() -parse_meta_filter_item_ids <- function(api_meta_filters) { +parse_meta_filter_item_ids <- function(api_meta_filters, + verbose = FALSE) { nfilters <- length(api_meta_filters$id) filter_items <- data.frame( col_id = NA, diff --git a/R/http_request_error.R b/R/http_request_error.R index 6824f14..0e06ead 100644 --- a/R/http_request_error.R +++ b/R/http_request_error.R @@ -24,7 +24,9 @@ http_request_error <- function( ), response_text = c( "Successful API request.", - "Invalid query, data set ID, data set version or API version submitted to API.", + paste( + "Invalid query, data set ID, data set version or API version submitted to API." + ), paste( "Internal server error encountered - please contact the EES API team at", "explore.statistics@education.gov.uk", @@ -32,17 +34,28 @@ http_request_error <- function( ) ) ) + status_group <- trunc(response$status / 100.) if (status_group %in% status_lookup$response_group) { status_response_text <- status_lookup |> dplyr::filter(status_lookup$response_group == status_group) |> dplyr::pull("response_text") - if (!(status_group %in% c(2, 5)) && !is.null(response$errors)) { - status_response_text <- status_response_text |> - paste0( - "\n", - paste(response$errors, collapse = ". ") + if (!(status_group %in% c(2, 5))) { + api_error <- response |> + httr::content("text") |> + jsonlite::fromJSON() |> + magrittr::extract2("errors") + if (!is.null(api_error)) { + status_response_text <- paste0( + api_error |> + dplyr::pull("message"), + "\n ", + api_error |> + dplyr::pull("detail") |> + unlist() |> + paste0(collapse = ", ") ) + } } } else { status_response_text <- "API http response code not recognised." @@ -50,7 +63,7 @@ http_request_error <- function( if (status_group != 2) { stop( paste0( - "HTTP connection error: ", + "\nHTTP connection error: ", response$status, "\n", status_response_text diff --git a/R/post_dataset.R b/R/post_dataset.R new file mode 100644 index 0000000..ce9c038 --- /dev/null +++ b/R/post_dataset.R @@ -0,0 +1,159 @@ +#' Query a data set using POST and a query json +#' +#' @description +#' This function provides a method for generating and sending a json based data query to the +#' EES API. As a minimum, it requires the dataset_id flag and either the indicators flag or +#' a json file containing a query to be provided. +#' +#' @inheritParams api_url +#' @inheritParams parse_tojson_params +#' @param json_query Optional path to a json file containing the query parameters +#' @param parse Logical flag to activate parsing of the results. Default: TRUE +#' +#' @return Data frame containing query results of an API data set +#' @export +#' +#' @examples +#' post_dataset( +#' example_id(group = "attendance"), +#' json_query = example_json_query() +#' ) +#' +#' # Run post_dataset() to select rows containing either of two geographic locations and either of +#' # two filter items. +#' post_dataset( +#' example_id(group = "attendance"), +#' indicators = example_id("indicator", group = "attendance"), +#' time_periods = "2024|W23", +#' geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), +#' filter_items = c("CvuId", "6AXrf"), +#' page = 1, +#' page_size = 32 +#' ) +#' +#' # Run post_dataset() using set parameters giving a combination of filter options +#' post_dataset( +#' example_id(group = "attendance"), +#' indicators = example_id("indicator", group = "attendance"), +#' time_periods = "2024|W23", +#' geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), +#' filter_items = list( +#' attendance_status = c("pmRSo", "7SdXo"), +#' attendance_type = c("CvuId", "6AXrf", "0k3T5", "YdkHK"), +#' education_phase = c("ThDPJ", "crH31"), +#' day_number = c("uLQo4"), +#' reason = c("bBrtT") +#' ) +#' ) +post_dataset <- function( + dataset_id, + indicators = NULL, + time_periods = NULL, + geographies = NULL, + filter_items = NULL, + json_query = NULL, + dataset_version = NULL, + api_version = NULL, + page = NULL, + page_size = 1000, + parse = TRUE, + debug = FALSE, + verbose = FALSE) { + if (is.null(indicators) && is.null(json_query)) { + stop("At least one of either indicators or json_query must not be NULL.") + } + if (!is.null(json_query)) { + if (any(!is.null(c(indicators, time_periods, geographies, filter_items)))) { + warning( + paste( + "json_query is set - ignoring indicators, time_periods, geographies", + " and filter_items params." + ) + ) + } + if (json_query |> stringr::str_sub(-5) == ".json") { + json_body <- readLines(json_query) |> + paste0(collapse = "\n") + } else { + message("Parsing query options") + json_body <- json_query + } + } else { + json_body <- eesyapi::parse_tojson_params( + indicators = indicators, + time_periods = time_periods, + geographies = geographies, + filter_items = filter_items, + page = page, + page_size = page_size, + debug = debug, + verbose = verbose + ) + } + if (verbose) { + json_body |> cat(fill = TRUE) + } + response <- eesyapi::api_url( + "post-data", + dataset_id = dataset_id, + dataset_version = dataset_version + ) |> httr::POST( + body = json_body, + encode = "json", + httr::content_type("application/json") + ) + if (verbose) { + print(response) + print( + response |> + httr::content("text") |> + jsonlite::fromJSON() + ) + } + eesyapi::http_request_error(response) + # Unless the user specifies a specific page of results to get, loop through all available pages. + response_json <- response |> + httr::content("text") |> + jsonlite::fromJSON() + if (verbose) { + message(paste("Total number of pages: ", response_json$paging$totalPages)) + } + dfresults <- response_json$results |> + eesyapi::parse_api_dataset(verbose = verbose) + # Unless the user has requested a specific page, then assume they'd like all pages collated and + # recursively run the query. + if (is.null(page) && is.null(json_query)) { + if (response_json$paging$totalPages > 1) { + for (page in c(2:response_json$paging$totalPages)) { + json_body <- eesyapi::parse_tojson_params( + indicators = indicators, + time_periods = time_periods, + geographies = geographies, + filter_items = filter_items, + page = page, + page_size = page_size, + verbose = verbose + ) + response_page <- eesyapi::api_url( + "post-data", + dataset_id = dataset_id, + dataset_version = dataset_version + ) |> + httr::POST( + body = json_body, + encode = "json", + httr::content_type("application/json") + ) |> + httr::content("text") |> + jsonlite::fromJSON() + response_page |> eesyapi::warning_max_pages() + dfresults <- dfresults |> + dplyr::bind_rows( + response_page$results |> + eesyapi::parse_api_dataset(verbose = verbose) + ) + } + } + } + return(dfresults) +} diff --git a/R/post_dataset_utils.R b/R/post_dataset_utils.R new file mode 100644 index 0000000..063ea74 --- /dev/null +++ b/R/post_dataset_utils.R @@ -0,0 +1,370 @@ +#' Create a json query +#' +#' @description +#' Creates a json query for use when POST-ing a query to the API. This takes time period, +#' geography, filter item and indicator criteria and produces a working json query as a +#' single string. The result can be used directly by post_dataset() or the output of +#' `parse_tojson_params(...) |> cat()` can be copied and pasted as the "body" content in +#' other API connection software (such as Postman) to POST a query to the EES API. +#' +#' @inheritParams api_url +#' @inheritParams parse_tojson_geographies +#' @param debug Run POST query in debug mode: logic, default: FALSE +#' +#' @return String containing json query body for use with http POST request +#' @export +#' +#' @examples +#' parse_tojson_params(example_id("indicator")) |> +#' cat() +#' +#' parse_tojson_params( +#' example_id("indicator"), +#' time_periods = "2024|W23", +#' geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), +#' filter_items = c("pmRSo", "7SdXo") +#' ) |> +#' cat() +#' +#' # Create a geographies data frame to find both of: +#' # - England national level data +#' # - all LAs in a specified region ("E12000004") +#' dfgeographies <- data.frame( +#' return_level = c("NAT", "LA"), +#' search_level = c("NAT", "REG"), +#' identifier_type = c("code", "code"), +#' identifier = c("E92000001", "E12000004") +#' ) +#' +#' parse_tojson_params( +#' example_id("indicator"), +#' time_periods = "2024|W23", +#' geographies = dfgeographies, +#' filter_items = c("pmRSo") +#' ) |> +#' cat() +#' +#' # Create a filter list to find the combination of: +#' # - day_number is in c("uLQo4", "qf0jG", "aMjLP") *and* +#' # - reason is in c("bBrtT", "ThjPJ", "hsHyW", "m2m9K") *and* +#' # - education_phase is in c("5UNdi", "crH31") +#' filter_list <- list( +#' day_number = c("uLQo4", "qf0jG", "aMjLP"), +#' reason = c("bBrtT", "ThjPJ", "hsHyW", "m2m9K"), +#' education_phase = c("5UNdi", "crH31") +#' ) +#' +#' parse_tojson_params( +#' example_id("indicator"), +#' time_periods = "2024|W23", +#' geographies = "NAT|code|E92000001", +#' filter_items = filter_list +#' ) |> +#' cat() +#' +parse_tojson_params <- function( + indicators, + time_periods = NULL, + geographies = NULL, + filter_items = NULL, + page = 1, + page_size = 1000, + debug = FALSE, + verbose = FALSE) { + # Set some default strings + bridge <- "\n ]\n}," + debug_str <- paste(",\n\"debug\":", debug) |> tolower() + pages_str <- paste0( + ",\n\"page\": ", + ifelse(is.null(page), 1, page), + ",\n\"pageSize\": ", + page_size, + "\n}" + ) + + json_query <- paste0( + "{\n", + ifelse( + any(!is.null(c(time_periods, geographies, filter_items))), + paste0( + "\"criteria\": {\n \"and\": [\n", + paste( + eesyapi::parse_tojson_time_periods(time_periods), + eesyapi::parse_tojson_geographies(geographies), + eesyapi::parse_tojson_filter(filter_items, filter_type = "filter_items"), + sep = ",\n" + ) |> + stringr::str_replace_all(",\\n,\\n,\\n|,\\n,\\n", ",\\\n") |> + stringr::str_remove_all("^,\\n|,\\n$"), + bridge + ), + "" + ), + parse_tojson_indicators(indicators), + debug_str, + pages_str + ) + if (verbose) { + json_query |> cat() + } + return(json_query) +} + +#' Parse time_periods to json +#' +#' @description +#' Create a json query sub-string based on time periods constraints +#' +#' @inheritParams api_url +#' +#' @return String containing json form query for time periods +#' @export +#' +#' @examples +#' parse_tojson_time_periods(c("2023|W25", "2024|W12")) +parse_tojson_time_periods <- function(time_periods) { + eesyapi::validate_time_periods(time_periods) + if (!is.null(time_periods)) { + df_time_periods <- time_periods |> + stringr::str_split("\\|", simplify = TRUE) |> + as.data.frame() |> + dplyr::rename(period = "V1", code = "V2") + paste0( + " {\n \"timePeriods\": {\n \"in\": [\n", + paste0( + " {\n \"period\": \"", + df_time_periods$period, + "\",\n \"code\": \"", + df_time_periods$code, + "\"\n }", + collapse = ",\n" + ), + "\n ]\n }\n }" + ) + } else { + NULL + } +} + +#' Parse a combination-filter query to json +#' +#' @description +#' Create a json query sub-string based on a combination \"in\" and \"and\" constraints +#' +#' @inheritParams parse_tourl_filter_in +#' @return String containing json form query with \"and\"-combination of different filter +#' selections +#' @export +#' +#' @examples +#' parse_tojson_filter( +#' list( +#' day_number = c("uLQo4", "qf0jG", "aMjLP"), +#' reason = c("bBrtT", "ThjPJ", "hsHyW", "m2m9K"), +#' education_phase = c("5UNdi", "crH31") +#' ) +#' ) |> +#' cat() +parse_tojson_filter <- function(items, filter_type = "filter_items") { + eesyapi::validate_ees_filter_type(filter_type) + if (is.list(items)) { + # If items is a list, then process it as a combination separate "in" queries + paste0( + "{\n\"and\": [\n", + sapply(items, parse_tojson_filter_in, filter_type) |> + paste(collapse = ",\n"), "\n]\n}" + ) + } else if (is.vector(items)) { + # If items is a vector, then revert to just a single "in" query + parse_tojson_filter_in(items) + } else { + NULL + } +} + +#' Parse a filter-in type query to json +#' +#' @description +#' Create a json query sub-string based on filter-in constraints +#' +#' @inheritParams parse_tourl_filter_in +#' +#' @return String containing json form query based on filter-in constraints +#' @export +#' +#' @examples +#' parse_tojson_filter_in(c("NAT", "REG"), filter_type = "geographic_levels") +parse_tojson_filter_in <- function(items, filter_type = "filter_items") { + eesyapi::validate_ees_filter_type(filter_type) + if (!is.null(items)) { + api_filter_type <- eesyapi::convert_api_filter_type(filter_type) + paste0( + " {\n \"", + api_filter_type, + "\": {\n \"in\": [\n \"", + paste0(items, collapse = "\",\n \""), + "\"\n ]\n }\n }" + ) + } else { + NULL + } +} + +#' Parse a filter-equal type query to json +#' +#' @description +#' Create a json query sub-string based on filter-equal constraints +#' +#' @inheritParams parse_tourl_filter_in +#' +#' @return String containing json form query based on filter-equal-to constraints +#' @export +#' +#' @examples +#' parse_tojson_filter_eq("NAT", filter_type = "geographic_levels") |> cat() +parse_tojson_filter_eq <- function(items, filter_type = "filter_items") { + eesyapi::validate_ees_filter_type(filter_type) + if (!is.null(items)) { + api_filter_type <- eesyapi::convert_api_filter_type(filter_type) + paste0( + " {\n \"", + api_filter_type, + "\": {\n \"eq\": \"", + items, + "\"\n }\n }" + ) + } else { + NULL + } +} + +#' Parse geographies to json +#' +#' @description +#' Create a json query sub-string based on location constraints +#' +#' @param geographies String, vector or data frame containing the geographic levels and +#' locations to be queried. +#' +#' @return String containing json form query for geographies +#' @export +#' +#' @examples +#' parse_tojson_geographies(c("NAT", "REG")) |> +#' cat() +#' parse_tojson_geographies(c("NAT|id|dP0Zw", "REG|id|rg3Nj")) |> +#' cat() +#' parse_tojson_geographies(c("NAT|id|dP0Zw", "REG")) |> +#' cat() +#' parse_tojson_geographies(c("NAT|id|dP0Zw", "REG")) |> +#' cat() +parse_tojson_geographies <- function(geographies) { + if (is.null(geographies)) { + return(NULL) + } else if (is.vector(geographies) || is.character(geographies)) { + geographies <- geographies |> + stringr::str_split("\\|", simplify = TRUE) |> + as.data.frame() + if (ncol(geographies) == 1) { + geographies <- geographies |> + dplyr::mutate( + V2 = "", + V3 = "" + ) + } + geographies <- geographies |> + dplyr::rename(search_level = "V1", identifier_type = "V2", identifier = "V3") |> + dplyr::mutate(return_level = !!rlang::sym("search_level")) + } else if (is.data.frame(geographies)) { + if ( + !all( + c( + "return_level", + "search_level", + "identifier_type", + "identifier" + ) %in% + colnames(geographies) + ) + ) { + stop("The column \"search_level\" is required in the geographies data frame.") + } + } else { + stop("The geographies parameter should be given as either a data frame, vector or string.") + } + paste0( + " {\n \"or\": [\n", + paste0( + " {\n \"and\": [\n", + parse_tojson_filter_eq( + geographies |> + dplyr::pull("return_level"), + filter_type = "geographic_levels" + ), + parse_tojson_location(geographies, include_comma = TRUE), + "\n ]\n }", + collapse = ",\n" + ), + "\n ]\n }" + ) +} + +#' Create json location search string from geographies +#' +#' @param geographies Vector or data frame of search geographies +#' @param include_comma Include a comma before return strings (logical) +#' +#' @return Vector of strings containing json location search string +#' @export +#' +#' @examples +#' parse_tojson_location(example_geography_query()) |> cat() +parse_tojson_location <- function(geographies, include_comma = FALSE) { + comma_string <- ifelse(include_comma, ",", "") + location_json <- geographies |> + dplyr::mutate( + location_json = dplyr::if_else( + !!rlang::sym("identifier_type") != "", + paste0( + comma_string, + "\n {\n \"locations\": {\n \"in\": [\n", + " {\n \"level\": \"", + !!rlang::sym("search_level"), + "\",\n \"", + !!rlang::sym("identifier_type"), + "\": \"", + !!rlang::sym("identifier"), + "\"\n }\n ]\n }\n }" + ), + "" + ) + ) |> + dplyr::pull("location_json") + return(location_json) +} + + +#' Parse an indicator-in type query to json +#' +#' @description +#' Create a json query sub-string based on indicator-in constraints +#' +#' @param indicators String or vector of strings containing indicator ids +#' +#' @return A json query string to select a set of indicators +#' @export +#' +#' @examples +#' parse_tojson_indicators(example_id("indicator")) |> +#' cat() +parse_tojson_indicators <- function(indicators) { + eesyapi::validate_ees_id(indicators, level = "indicator") + paste0( + "\n\"indicators\": [\n \"", + paste0( + indicators, + collapse = "\",\n \"" + ), + "\"\n]" + ) +} diff --git a/R/query_dataset.R b/R/query_dataset.R index c3172fa..128807a 100644 --- a/R/query_dataset.R +++ b/R/query_dataset.R @@ -1,45 +1,191 @@ #' Query a data set #' #' @description -#' Create and send a query to the EES API. Queries can be constructed by including the -#' codes to the relevant flags to filter on time period, geographic level, location, -#' and data set specific filters. If none of the above are set in the function call, -#' then the entire data set will be retrieved. The data set id and specific indicators -#' of interest must be supplied explictly using the dataset_id and indicators params. +#' Create and send a query to the EES API. Queries can be supplied and run in one of 4 ways: +#' - Supplying a json query in a file to be sent with the POST method. +#' - Supplying a json query in a string variable to be sent with the POST method. +#' - Supplying parameters (time_periods, geographies, filter_items, indicators) to build a json +#' query is then sent with the POST method. +#' - Supplying parameters (time_periods, geographies, filter_items, indicators) to build a json +#' query is then sent with the GET method. +#' +#' In all cases, the data set id must be supplied explicitly using the dataset_id. +#' +#' Details on the format of each parameter for the latter two methods are as follows. +#' +#' ## indicators +#' +#' This must be supplied as a vector of sqids, which can be identified from the meta data using +#' `get_meta()`. +#' +#' ## time_periods +#' +#' Time periods should be supplied as a vector of periods and codes in the form: +#' - `"period|code"` +#' +#' For example, selecting the 2023 and 2024 academic years would require: +#' `time_period = c("2023|AY", "2024|AY")` +#' +#' ## geographies +#' +#' Geographies can be supplied as a vector or a data frame depending on the complexity of the +#' desired query. +#' +#' A vector will run a query returning **any** rows meeting any of the given +#' geographies, i.e. geographies = `c("NAT", "REG")` will return all national and regional level +#' rows, whilst `c("NAT", "REG|code|E120000001")` will return all national level rows and all North +#' East rows. Specific locations are required to be supplied in the format +#' `"LEVEL|identifier_type|identifier"`, where identifier type can be either code or id and the +#' corresponding identifier is then the standard ONS code or the sqid given in the meta data +#' respectively. Using England as an example, these would be: +#' - `"NAT|code|E92000001"` +#' - `"NAT|id|dP0Zw"` +#' +#' If you require a more complex selection, for example all LAs in a given region, then a data +#' frame should be supplied, with a row for each selection. Note however, that this will only work +#' when using the default `POST` method. The `GET` method is much more limited and can not process +#' more complex queries. +#' +#' The geography query data frame should contain the following columns: +#' - return_level: the geographic level to return (e.g. LA in the example above). +#' - search_level: the geographic level of the search location (e.g. REG in the example above). +#' - identifier_type: "code" or "id". +#' - identifier: the code or id (sqid) for the search location (e.g. the code or sqid of the +#' region in the above example). +#' +#' Further rows can be added to add other geography searches to include in results. +#' +#' An example of a working geographies data frame can be obtained using `example_geography_query()`. +#' +#' ## filter_items +#' +#' Similarly to geographies, criteria for querying on filter items can be provided either via a +#' vector for simple queries or, for more complex queries, as a list. In both cases, vector or +#' list, filter items can only be supplied as sqids, i.e. the ids found in the meta data using +#' `get_meta(dataset_id)`. +#' +#' Providing a vector of sqids will effectively run a query returning any rows containing any of +#' the listed sqids. This therefore does not allow narrow searches based on a row or set of rows +#' matching multiple criteria. +#' +#' Providing a list structure can provide a more narrow query selecting individual rows based on +#' a combination of criteria. For example if we want rows that contain sqid1 in one column and +#' sqid2 in another, then we would pass a list of: +#' `filter_query <- list(column1 = c("sqid1"), column2 = c("sqid2"))` +#' Note that the naming of the entries in the list is not necessary, but may help in creating more +#' readable code. +#' +#' If we wish to create a query whereby we receive rows containing sqid1 in column 1 and sqid2 or +#' sqid3 in column 2, then the required list would be: +#' `filter_query <- list(column1 = c("sqid1"), column2 = c("sqid2", "sqid3"))` +#' +#' In this way, we can build up combinations of OR and AND criteria for a query across multiple +#' filter columns. +#' +#' *Note again that the more complex querying using a list variable will only function when using +#' the `POST` method*. +#' +#' ## Controlling paging +#' +#' You can request a specific set of rows using the page and page_size parameters. Keeping the +#' default of page = NULL will return all rows matching the query. Setting page and page_size to +#' numerical values will attempt to return a subset of rows, with page_size defining the number of +#' rows and page defining which subset of rows to return from the query (i.e. page = 1, page_size = +#' 20 will return the first 20 rows, page = 2 and page_size = 20 will return the second 20 rows +#' and so on). #' #' @inheritParams api_url -#' @param method An API query method. Needs to be "GET" +#' @inheritParams post_dataset +#' @param method The API query method to be used. Can be "POST" or "GET". Default: "POST". #' #' @return Data frame containing query results #' @export #' #' @examples +#' # Run query_dataset() using a json query string input to json_query (this can also be done by +#' # passing a filename of a file containing your json query string). #' query_dataset( -#' example_id(), -#' geographic_levels = "NAT", -#' filter_items = example_id("filter_item"), -#' indicators = example_id("indicator") +#' example_id(group = "attendance"), +#' json_query = example_json_query() #' ) +#' +#' # If you don't want to have tio write your own json query, the rest of the examples illustrate +#' # how to use query_dataset() with parameters to construct queries in R. +#' +#' # Run query_dataset() to select rows containing either of two geographic locations and either of +#' # two filter items. +#' query_dataset( +#' example_id(group = "attendance"), +#' indicators = example_id("indicator", group = "attendance"), +#' time_periods = "2024|W23", +#' geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), +#' filter_items = c("CvuId", "6AXrf"), +#' page = 1, +#' page_size = 32 +#' ) +#' +#' # Run query_dataset() using set parameters giving a combination of filter options +#' query_dataset( +#' example_id(group = "attendance"), +#' indicators = example_id("indicator", group = "attendance"), +#' time_periods = "2024|W23", +#' geographies = c("NAT"), +#' filter_items = list( +#' attendance_status = c("pmRSo", "7SdXo"), +#' attendance_type = c("CvuId", "6AXrf", "0k3T5", "YdkHK"), +#' education_phase = c("ThDPJ", "crH31"), +#' day_number = c("uLQo4"), +#' reason = c("bBrtT") +#' ) +#' ) +#' +#' # Run a query with a more complex geography selection. Return data for all of: +#' # - England +#' # - Yorkshire and the Humber +#' # - All LAs in Yorkshire and the Humber +#' example_geography_query("nat_yorks_yorkslas") +#' query_dataset( +#' example_id(group = "attendance"), +#' indicators = example_id("indicator", group = "attendance"), +#' time_periods = "2024|W23", +#' geographies = example_geography_query("nat_yorks_yorkslas"), +#' filter_items = list( +#' attendance_status = c("pmRSo"), +#' attendance_type = c("CvuId"), +#' education_phase = c("ThDPJ"), +#' day_number = c("uLQo4"), +#' reason = c("bBrtT") +#' ) +#' ) +#' +#' # Run a basic query using GET instead of POST #' query_dataset( #' example_id(), +#' method = "GET", +#' geographic_levels = c("SCH"), +#' filter_items = example_id("filter_item"), #' indicators = example_id("indicator"), #' page = 1, #' page_size = 10 #' ) +#' query_dataset <- function( dataset_id, - indicators, + indicators = NULL, time_periods = NULL, + geographies = NULL, geographic_levels = NULL, locations = NULL, filter_items = NULL, - method = "GET", + json_query = NULL, + method = "POST", dataset_version = NULL, api_version = NULL, page_size = 1000, page = NULL, + debug = FALSE, verbose = FALSE) { - if (method != "GET") { + if (!(method %in% c("POST", "GET"))) { stop( paste( "Invalid method selected. The keyword method should be set to GET", @@ -47,7 +193,22 @@ query_dataset <- function( ) ) } - if (method == "GET") { + if (method == "POST") { + eesyapi::post_dataset( + dataset_id = dataset_id, + indicators = indicators, + time_periods = time_periods, + geographies = geographies, + filter_items = filter_items, + json_query = json_query, + dataset_version = dataset_version, + api_version = api_version, + page_size = page_size, + page = page, + debug = debug, + verbose = verbose + ) + } else { warning( paste( "Using GET to query a data set offers limited functionality, we recommend", diff --git a/R/validation_rules.R b/R/validation_rules.R index a679934..ada3e5d 100644 --- a/R/validation_rules.R +++ b/R/validation_rules.R @@ -24,17 +24,42 @@ validate_page_size <- function(page_size, min = 1, max = 40) { } } +#' Validate time periods +#' +#' @inheritParams api_url_query +#' +#' @return NULL +#' @export +#' +#' @examples +#' validate_time_periods(c("2023|AY", "2024|AY")) +validate_time_periods <- function(time_periods) { + time_pipes <- time_periods |> + stringr::str_replace_all("[a-zA-Z0-9]", "") + if (!all(stringr::str_length(time_pipes) == 1)) { + invalid_pipes <- time_periods[stringr::str_length(time_pipes) != 1] + stop( + paste( + "Invalid time periods provided:", + paste(invalid_pipes, collapse = ", "), + "\nThese should be in the format {period}|{code}, e.g. 2024|AY, 2023|W21" + ) + ) + } +} + #' Validate element IDs #' #' @param element_id ID for publication or a data set #' @param level ID level: "publication", "dataset", "location", "filter_item" or "indicator" +#' @param verbose Run in verbose mode #' #' @return NULL #' @export #' #' @examples #' validate_ees_id(example_id("publication"), level = "publication") -validate_ees_id <- function(element_id, level = "publication") { +validate_ees_id <- function(element_id, level = "publication", verbose = FALSE) { if (!(level %in% c("publication", "dataset", "location", "filter_item", "indicator"))) { stop( paste0( @@ -43,59 +68,111 @@ validate_ees_id <- function(element_id, level = "publication") { ) ) } - skip_tests <- FALSE if (is.null(element_id)) { stop( "The variable ", level, "_id is NULL, please provide a valid ", level, "_id." ) - } else { - if (level == "location") { - locations <- element_id |> - stringr::str_split("\\|") - if (any(locations |> sapply(length) < 3)) { - stop('Invalid location IDs found, these should be of the form "XXX|xxxx|1b3d5".') - } else { - # Extract the individual 5 digit location IDs - locations <- locations |> - as.data.frame() |> - t() |> - as.data.frame() |> - dplyr::rename(level = "V1", type = "V2", value = "V3") - element_id_proc <- locations$value - if (all(locations$type == "id")) { - example_vector <- eesyapi::example_id("location") |> - stringr::str_split("\\|") - example_id_string <- example_vector[[1]][3] - } else { - skip_tests <- TRUE - } - } + } + if (level == "location") { + locations <- element_id |> + stringr::str_split("\\|", simplify = TRUE) + if ("" %in% locations || ncol(locations) != 3) { + stop('Invalid locations found, these should be of the form "LEVEL|xxxx|1b3d5".') } else { - element_id_proc <- element_id - example_id_string <- eesyapi::example_id(level) - } - if (!skip_tests) { - if (any(stringr::str_length(element_id) != stringr::str_length(eesyapi::example_id(level)))) { - err_string <- paste0( - "The ", level, - "_id(s) provided (", paste0(element_id_proc, collapse = ", "), - ") is expected to be a ", - stringr::str_length(example_id_string), - " character string in the format:\n ", - example_id_string, - "\n Please double check your ", level, - "_id." - ) - stop(err_string) - } else if ( - any( - gsub("[0-9a-zA-Z]", "", element_id_proc) != gsub("[0-9a-zA-Z]", "", example_id_string) - ) - ) { - stop(err_string) + # Extract the individual 5 digit location IDs + df_locations <- locations |> + as.data.frame() |> + dplyr::rename(level = "V1", identifier_type = "V2", identifier = "V3") + location_type <- df_locations |> + dplyr::pull("identifier_type") |> + unique() + if (any(!(location_type %in% c("id", "code")))) { + stop("The middle entry in \"LEVEL|xxxx|1b3d5\" should be one of \"id\" or \"code\"") } + level <- paste(level, location_type, sep = "_") + element_id <- df_locations } + } else { + element_id <- data.frame(identifier = element_id) |> + dplyr::mutate(identifier_type = "id") + } + example_id_string <- eesyapi::example_id(level, group = "attendance") + if (any(grepl("location", level))) { + example_id_string <- example_id_string |> + stringr::str_split("\\|", simplify = TRUE) |> + as.data.frame() |> + dplyr::rename( + identifier_type = "V2", + identifier = "V3" + ) + } else { + example_id_string <- data.frame(identifier = example_id_string) |> + dplyr::mutate(identifier_type = "id") + } + check_frame <- element_id |> + dplyr::left_join(example_id_string, by = "identifier_type") + error_rows <- check_frame |> + dplyr::filter( + !!rlang::sym("identifier_type") == "id", + stringr::str_length(!!rlang::sym("identifier.x")) < + stringr::str_length(!!rlang::sym("identifier.y")) + ) |> + dplyr::bind_rows( + check_frame |> + dplyr::filter( + !!rlang::sym("identifier_type") == "code", + stringr::str_length(!!rlang::sym("identifier.x")) != + stringr::str_length(!!rlang::sym("identifier.y")) + ) + ) + if (nrow(error_rows) != 0) { + err_string <- paste0( + "The ", paste(level, collapse = ","), + "(s) provided (", + paste0(error_rows |> dplyr::pull("identifier.x"), collapse = ", "), + ") is expected to be a ", + paste0(error_rows |> dplyr::pull("identifier.y") |> stringr::str_length(), collapse = ", "), + " character string in the format:\n ", + paste0(error_rows |> dplyr::pull("identifier.y"), collapse = ", "), + "\n Please double check your ", paste(level, collapse = ","), + "." + ) + stop(err_string) + } else if ( + any( + gsub("[0-9a-zA-Z]", "", element_id |> dplyr::pull("identifier")) != + gsub("[0-9a-zA-Z]", "", example_id_string |> dplyr::pull("identifier")) + ) + ) { + stop( + paste( + "Some elements in", + paste(element_id |> dplyr::pull("identifier"), collapse = ", "), + "do not match the expected structure: ", + example_id_string |> dplyr::pull("identifier") + ) + ) + } +} + +#' Validate filter type +#' +#' @param filter_type type of filter being queried: "time_periods", "geographic_levels", +#' +#' @return NULL +#' @export +#' +#' @examples +#' validate_ees_filter_type("time_periods") +validate_ees_filter_type <- function(filter_type) { + if (!(filter_type %in% c("time_periods", "geographic_levels", "locations", "filter_items"))) { + stop( + paste( + "filter_type keyword should be one of \"time_periods\", \"geographic_levels\",", + "\"locations\" or \"filter_items\"" + ) + ) } } diff --git a/_pkgdown.yml b/_pkgdown.yml index a6054b9..291b7f4 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -17,10 +17,11 @@ reference: - title: Support for generating API URLs and interpreting responses desc: These functions are helpful for deriving urls and handling HTTP responses and are used widely by the API workflow functions. contents: - - example_id - starts_with("api_url") - http_request_error - - parse_filter_in + - convert_api_filter_type + - parse_tourl_filter_in + - starts_with("parse_tojson") - title: Support for handling meta data from the API desc: These functions are used by `get_meta()` and included here for completeness. @@ -32,8 +33,14 @@ reference: desc: These functions are used by `get_meta()` and included here for completeness. contents: - get_dataset + - post_dataset - parse_api_dataset +- title: Examples + desc: Functions to create useful example cases for tests and code examples + contents: + - starts_with("example_") + - title: Validation functions desc: These functions are used across the package to validate elements being passed as part of an API url or query. contents: diff --git a/man/api_url.Rd b/man/api_url.Rd index d773cce..00eb32d 100644 --- a/man/api_url.Rd +++ b/man/api_url.Rd @@ -51,7 +51,7 @@ of "get-summary", "get-meta", "get-data" or "post-data"} \item{environment}{EES environment to connect to: "dev", "test", "preprod" or "prod"} -\item{verbose}{Add extra contextual information whilst running} +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ A string containing the URL for connecting to the EES API diff --git a/man/convert_api_filter_type.Rd b/man/convert_api_filter_type.Rd new file mode 100644 index 0000000..b4d1d6a --- /dev/null +++ b/man/convert_api_filter_type.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_api_filter_type.R +\name{convert_api_filter_type} +\alias{convert_api_filter_type} +\title{Convert filter type to API filter type} +\usage{ +convert_api_filter_type(filter_type) +} +\arguments{ +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels", +"locations" or "filter_items"} +} +\value{ +String containing API friendly filter type descriptor +} +\description{ +The API uses a slightly different naming convention for the different types of +filters to what is used by analysts within data files. The function just converts +from the file versions to the API versions. +} +\examples{ +convert_api_filter_type("filter_items") +convert_api_filter_type("geographic_levels") +convert_api_filter_type("locations") +convert_api_filter_type("filter_items") +} diff --git a/man/example_geography_query.Rd b/man/example_geography_query.Rd new file mode 100644 index 0000000..a3c503e --- /dev/null +++ b/man/example_geography_query.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/examples.R +\name{example_geography_query} +\alias{example_geography_query} +\title{Create an example geography-query data frame} +\usage{ +example_geography_query(level = "nat_yorks") +} +\arguments{ +\item{level}{Query level within available options, can be one of \"nat_yorks\" or +\"nat_yorks_yorkslas\"} +} +\value{ +Data frame containing an example geography query +} +\description{ +Create an example geography-query data frame +} +\examples{ +example_geography_query() +} diff --git a/man/example_id.Rd b/man/example_id.Rd index 1ea8e45..08d2f82 100644 --- a/man/example_id.Rd +++ b/man/example_id.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/example_id.R +% Please edit documentation in R/examples.R \name{example_id} \alias{example_id} \title{Example ID} diff --git a/man/example_json_query.Rd b/man/example_json_query.Rd new file mode 100644 index 0000000..0d6d812 --- /dev/null +++ b/man/example_json_query.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/examples.R +\name{example_json_query} +\alias{example_json_query} +\title{Create an example json query string} +\usage{ +example_json_query() +} +\value{ +String containing an example json query +} +\description{ +Create an example json query string for use in examples and tests +} +\examples{ +example_json_query() |> cat() +} diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 1933c75..ae80656 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -43,7 +43,7 @@ of "get-summary", "get-meta", "get-data" or "post-data"} \item{parse}{Logical flag to activate parsing of the results. Default: TRUE} -\item{verbose}{Add extra contextual information whilst running} +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Data frame containing query results of an API data set @@ -60,7 +60,7 @@ any of those items. \examples{ get_dataset( example_id(), - geographic_levels = "NAT", + geographic_levels = c("SCH"), filter_items = example_id("filter_item"), indicators = example_id("indicator") ) diff --git a/man/get_meta.Rd b/man/get_meta.Rd index 03e58a1..467ad6b 100644 --- a/man/get_meta.Rd +++ b/man/get_meta.Rd @@ -4,14 +4,22 @@ \alias{get_meta} \title{Get a parsed version of the API response for a data set's meta data} \usage{ -get_meta(dataset_id, dataset_version = NULL, api_version = NULL) +get_meta( + dataset_id, + dataset_version = NULL, + api_version = NULL, + verbose = FALSE +) } \arguments{ -\item{dataset_id}{ID of data set to be connected to} +\item{dataset_id}{ID of data set to be connected to. This is required if the endpoint is one +of "get-summary", "get-meta", "get-data" or "post-data"} \item{dataset_version}{Version of data set to be connected to} \item{api_version}{EES API version} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ List of data frames containing a data set's meta data diff --git a/man/get_meta_response.Rd b/man/get_meta_response.Rd index 84cbbf0..dfc5176 100644 --- a/man/get_meta_response.Rd +++ b/man/get_meta_response.Rd @@ -8,17 +8,21 @@ get_meta_response( dataset_id, dataset_version = NULL, api_version = NULL, - parse = TRUE + parse = TRUE, + verbose = FALSE ) } \arguments{ -\item{dataset_id}{ID of data set to be connected to} +\item{dataset_id}{ID of data set to be connected to. This is required if the endpoint is one +of "get-summary", "get-meta", "get-data" or "post-data"} \item{dataset_version}{Version of data set to be connected to} \item{api_version}{EES API version} \item{parse}{Parse result into structured list} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Results of query to API meta data endpoint diff --git a/man/parse_meta_filter_columns.Rd b/man/parse_meta_filter_columns.Rd index abc747a..85d82fa 100644 --- a/man/parse_meta_filter_columns.Rd +++ b/man/parse_meta_filter_columns.Rd @@ -4,10 +4,12 @@ \alias{parse_meta_filter_columns} \title{Parse API meta to give the filter columns} \usage{ -parse_meta_filter_columns(api_meta_filters) +parse_meta_filter_columns(api_meta_filters, verbose = FALSE) } \arguments{ \item{api_meta_filters}{Filter information provided by the API output} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ data frame containing column names and labels diff --git a/man/parse_meta_filter_item_ids.Rd b/man/parse_meta_filter_item_ids.Rd index 0d5b22f..2159672 100644 --- a/man/parse_meta_filter_item_ids.Rd +++ b/man/parse_meta_filter_item_ids.Rd @@ -4,10 +4,12 @@ \alias{parse_meta_filter_item_ids} \title{Parse API meta to give the filter item codes} \usage{ -parse_meta_filter_item_ids(api_meta_filters) +parse_meta_filter_item_ids(api_meta_filters, verbose = FALSE) } \arguments{ \item{api_meta_filters}{Filter information provided by the API output} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Data frame containing filter item codes matched to filter item labels and col_name diff --git a/man/parse_meta_location_ids.Rd b/man/parse_meta_location_ids.Rd index 1a7d35c..d581cac 100644 --- a/man/parse_meta_location_ids.Rd +++ b/man/parse_meta_location_ids.Rd @@ -4,10 +4,12 @@ \alias{parse_meta_location_ids} \title{Parse API meta to give the locations} \usage{ -parse_meta_location_ids(api_meta_locations) +parse_meta_location_ids(api_meta_locations, verbose = FALSE) } \arguments{ \item{api_meta_locations}{Locations information provided by the API output} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Data frame containing location item codes matched diff --git a/man/parse_meta_time_periods.Rd b/man/parse_meta_time_periods.Rd index 671870e..c576c52 100644 --- a/man/parse_meta_time_periods.Rd +++ b/man/parse_meta_time_periods.Rd @@ -4,10 +4,12 @@ \alias{parse_meta_time_periods} \title{Parse API meta to give the time periods} \usage{ -parse_meta_time_periods(api_meta_time_periods) +parse_meta_time_periods(api_meta_time_periods, verbose = FALSE) } \arguments{ \item{api_meta_time_periods}{Time periods information provided by the API output} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Data frame containing location item codes matched diff --git a/man/parse_tojson_filter.Rd b/man/parse_tojson_filter.Rd new file mode 100644 index 0000000..c1011fa --- /dev/null +++ b/man/parse_tojson_filter.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_filter} +\alias{parse_tojson_filter} +\title{Parse a combination-filter query to json} +\usage{ +parse_tojson_filter(items, filter_type = "filter_items") +} +\arguments{ +\item{items}{items to be included in the "in" statement} + +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels", +"locations" or "filter_items"} +} +\value{ +String containing json form query with \"and\"-combination of different filter +selections +} +\description{ +Create a json query sub-string based on a combination \"in\" and \"and\" constraints +} +\examples{ +parse_tojson_filter( + list( + day_number = c("uLQo4", "qf0jG", "aMjLP"), + reason = c("bBrtT", "ThjPJ", "hsHyW", "m2m9K"), + education_phase = c("5UNdi", "crH31") + ) +) |> + cat() +} diff --git a/man/parse_tojson_filter_eq.Rd b/man/parse_tojson_filter_eq.Rd new file mode 100644 index 0000000..95cd030 --- /dev/null +++ b/man/parse_tojson_filter_eq.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_filter_eq} +\alias{parse_tojson_filter_eq} +\title{Parse a filter-equal type query to json} +\usage{ +parse_tojson_filter_eq(items, filter_type = "filter_items") +} +\arguments{ +\item{items}{items to be included in the "in" statement} + +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels", +"locations" or "filter_items"} +} +\value{ +String containing json form query based on filter-equal-to constraints +} +\description{ +Create a json query sub-string based on filter-equal constraints +} +\examples{ +parse_tojson_filter_eq("NAT", filter_type = "geographic_levels") |> cat() +} diff --git a/man/parse_tojson_filter_in.Rd b/man/parse_tojson_filter_in.Rd new file mode 100644 index 0000000..6459bfb --- /dev/null +++ b/man/parse_tojson_filter_in.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_filter_in} +\alias{parse_tojson_filter_in} +\title{Parse a filter-in type query to json} +\usage{ +parse_tojson_filter_in(items, filter_type = "filter_items") +} +\arguments{ +\item{items}{items to be included in the "in" statement} + +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels", +"locations" or "filter_items"} +} +\value{ +String containing json form query based on filter-in constraints +} +\description{ +Create a json query sub-string based on filter-in constraints +} +\examples{ +parse_tojson_filter_in(c("NAT", "REG"), filter_type = "geographic_levels") +} diff --git a/man/parse_tojson_geographies.Rd b/man/parse_tojson_geographies.Rd new file mode 100644 index 0000000..6d4706a --- /dev/null +++ b/man/parse_tojson_geographies.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_geographies} +\alias{parse_tojson_geographies} +\title{Parse geographies to json} +\usage{ +parse_tojson_geographies(geographies) +} +\arguments{ +\item{geographies}{String, vector or data frame containing the geographic levels and +locations to be queried.} +} +\value{ +String containing json form query for geographies +} +\description{ +Create a json query sub-string based on location constraints +} +\examples{ +parse_tojson_geographies(c("NAT", "REG")) |> + cat() +parse_tojson_geographies(c("NAT|id|dP0Zw", "REG|id|rg3Nj")) |> + cat() +parse_tojson_geographies(c("NAT|id|dP0Zw", "REG")) |> + cat() +parse_tojson_geographies(c("NAT|id|dP0Zw", "REG")) |> + cat() +} diff --git a/man/parse_tojson_indicators.Rd b/man/parse_tojson_indicators.Rd new file mode 100644 index 0000000..f258d5d --- /dev/null +++ b/man/parse_tojson_indicators.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_indicators} +\alias{parse_tojson_indicators} +\title{Parse an indicator-in type query to json} +\usage{ +parse_tojson_indicators(indicators) +} +\arguments{ +\item{indicators}{String or vector of strings containing indicator ids} +} +\value{ +A json query string to select a set of indicators +} +\description{ +Create a json query sub-string based on indicator-in constraints +} +\examples{ +parse_tojson_indicators(example_id("indicator")) |> + cat() +} diff --git a/man/parse_tojson_location.Rd b/man/parse_tojson_location.Rd new file mode 100644 index 0000000..ec8a060 --- /dev/null +++ b/man/parse_tojson_location.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_location} +\alias{parse_tojson_location} +\title{Create json location search string from geographies} +\usage{ +parse_tojson_location(geographies, include_comma = FALSE) +} +\arguments{ +\item{geographies}{Vector or data frame of search geographies} + +\item{include_comma}{Include a comma before return strings (logical)} +} +\value{ +Vector of strings containing json location search string +} +\description{ +Create json location search string from geographies +} +\examples{ +parse_tojson_location(example_geography_query()) |> cat() +} diff --git a/man/parse_tojson_params.Rd b/man/parse_tojson_params.Rd new file mode 100644 index 0000000..36468db --- /dev/null +++ b/man/parse_tojson_params.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_params} +\alias{parse_tojson_params} +\title{Create a json query} +\usage{ +parse_tojson_params( + indicators, + time_periods = NULL, + geographies = NULL, + filter_items = NULL, + page = 1, + page_size = 1000, + debug = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{indicators}{Indicators required as a string or vector of strings (required)} + +\item{time_periods}{Time periods required as a string ("period|code") or vector of strings} + +\item{geographies}{String, vector or data frame containing the geographic levels and +locations to be queried.} + +\item{filter_items}{Filter items required as a string or vector of strings} + +\item{page}{Page number of query results to return} + +\item{page_size}{Number of results to return in a single query} + +\item{debug}{Run POST query in debug mode: logic, default: FALSE} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} +} +\value{ +String containing json query body for use with http POST request +} +\description{ +Creates a json query for use when POST-ing a query to the API. This takes time period, +geography, filter item and indicator criteria and produces a working json query as a +single string. The result can be used directly by post_dataset() or the output of +\code{parse_tojson_params(...) |> cat()} can be copied and pasted as the "body" content in +other API connection software (such as Postman) to POST a query to the EES API. +} +\examples{ +parse_tojson_params(example_id("indicator")) |> + cat() + +parse_tojson_params( + example_id("indicator"), + time_periods = "2024|W23", + geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + filter_items = c("pmRSo", "7SdXo") +) |> + cat() + +# Create a geographies data frame to find both of: +# - England national level data +# - all LAs in a specified region ("E12000004") +dfgeographies <- data.frame( + return_level = c("NAT", "LA"), + search_level = c("NAT", "REG"), + identifier_type = c("code", "code"), + identifier = c("E92000001", "E12000004") +) + +parse_tojson_params( + example_id("indicator"), + time_periods = "2024|W23", + geographies = dfgeographies, + filter_items = c("pmRSo") +) |> + cat() + +# Create a filter list to find the combination of: +# - day_number is in c("uLQo4", "qf0jG", "aMjLP") *and* +# - reason is in c("bBrtT", "ThjPJ", "hsHyW", "m2m9K") *and* +# - education_phase is in c("5UNdi", "crH31") +filter_list <- list( + day_number = c("uLQo4", "qf0jG", "aMjLP"), + reason = c("bBrtT", "ThjPJ", "hsHyW", "m2m9K"), + education_phase = c("5UNdi", "crH31") +) + +parse_tojson_params( + example_id("indicator"), + time_periods = "2024|W23", + geographies = "NAT|code|E92000001", + filter_items = filter_list +) |> + cat() + +} diff --git a/man/parse_tojson_time_periods.Rd b/man/parse_tojson_time_periods.Rd new file mode 100644 index 0000000..3f396b3 --- /dev/null +++ b/man/parse_tojson_time_periods.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset_utils.R +\name{parse_tojson_time_periods} +\alias{parse_tojson_time_periods} +\title{Parse time_periods to json} +\usage{ +parse_tojson_time_periods(time_periods) +} +\arguments{ +\item{time_periods}{Time periods required as a string ("period|code") or vector of strings} +} +\value{ +String containing json form query for time periods +} +\description{ +Create a json query sub-string based on time periods constraints +} +\examples{ +parse_tojson_time_periods(c("2023|W25", "2024|W12")) +} diff --git a/man/parse_filter_in.Rd b/man/parse_tourl_filter_in.Rd similarity index 65% rename from man/parse_filter_in.Rd rename to man/parse_tourl_filter_in.Rd index 4321fe3..73555e5 100644 --- a/man/parse_filter_in.Rd +++ b/man/parse_tourl_filter_in.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/api_url_query_utils.R -\name{parse_filter_in} -\alias{parse_filter_in} +\name{parse_tourl_filter_in} +\alias{parse_tourl_filter_in} \title{Create \if{html}{\out{}}.in query string for URL queries} \usage{ -parse_filter_in(items, type) +parse_tourl_filter_in(items, filter_type) } \arguments{ \item{items}{items to be included in the "in" statement} -\item{type}{type of filter items being queried: "time_periods", "geographic_levels", +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels", "locations" or "filter_items"} } \value{ @@ -20,5 +20,5 @@ Outputs a URL query string containing timePeriods.in=..., geographicLevels.in=.. use with querying a data set via GET. } \examples{ -parse_filter_in(c("2024|W11", "2024|W12"), type = "time_periods") +parse_tourl_filter_in(c("2024|W11", "2024|W12"), filter_type = "time_periods") } diff --git a/man/post_dataset.Rd b/man/post_dataset.Rd new file mode 100644 index 0000000..0f07b5a --- /dev/null +++ b/man/post_dataset.Rd @@ -0,0 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_dataset.R +\name{post_dataset} +\alias{post_dataset} +\title{Query a data set using POST and a query json} +\usage{ +post_dataset( + dataset_id, + indicators = NULL, + time_periods = NULL, + geographies = NULL, + filter_items = NULL, + json_query = NULL, + dataset_version = NULL, + api_version = NULL, + page = NULL, + page_size = 1000, + parse = TRUE, + debug = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{dataset_id}{ID of data set to be connected to. This is required if the endpoint is one +of "get-summary", "get-meta", "get-data" or "post-data"} + +\item{indicators}{Indicators required as a string or vector of strings (required)} + +\item{time_periods}{Time periods required as a string ("period|code") or vector of strings} + +\item{geographies}{String, vector or data frame containing the geographic levels and +locations to be queried.} + +\item{filter_items}{Filter items required as a string or vector of strings} + +\item{json_query}{Optional path to a json file containing the query parameters} + +\item{dataset_version}{Version of data set to be connected to} + +\item{api_version}{EES API version} + +\item{page}{Page number of query results to return} + +\item{page_size}{Number of results to return in a single query} + +\item{parse}{Logical flag to activate parsing of the results. Default: TRUE} + +\item{debug}{Run POST query in debug mode: logic, default: FALSE} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} +} +\value{ +Data frame containing query results of an API data set +} +\description{ +This function provides a method for generating and sending a json based data query to the +EES API. As a minimum, it requires the dataset_id flag and either the indicators flag or +a json file containing a query to be provided. +} +\examples{ +post_dataset( + example_id(group = "attendance"), + json_query = example_json_query() +) + +# Run post_dataset() to select rows containing either of two geographic locations and either of +# two filter items. +post_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + filter_items = c("CvuId", "6AXrf"), + page = 1, + page_size = 32 +) + +# Run post_dataset() using set parameters giving a combination of filter options +post_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + filter_items = list( + attendance_status = c("pmRSo", "7SdXo"), + attendance_type = c("CvuId", "6AXrf", "0k3T5", "YdkHK"), + education_phase = c("ThDPJ", "crH31"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ) +) +} diff --git a/man/query_dataset.Rd b/man/query_dataset.Rd index 385e1d9..892fe1a 100644 --- a/man/query_dataset.Rd +++ b/man/query_dataset.Rd @@ -6,16 +6,19 @@ \usage{ query_dataset( dataset_id, - indicators, + indicators = NULL, time_periods = NULL, + geographies = NULL, geographic_levels = NULL, locations = NULL, filter_items = NULL, - method = "GET", + json_query = NULL, + method = "POST", dataset_version = NULL, api_version = NULL, page_size = 1000, page = NULL, + debug = FALSE, verbose = FALSE ) } @@ -27,13 +30,18 @@ of "get-summary", "get-meta", "get-data" or "post-data"} \item{time_periods}{Time periods required as a string ("period|code") or vector of strings} +\item{geographies}{String, vector or data frame containing the geographic levels and +locations to be queried.} + \item{geographic_levels}{Geographic levels required as a string or vector of strings} \item{locations}{Location code required as a string or vector of strings} \item{filter_items}{Filter items required as a string or vector of strings} -\item{method}{An API query method. Needs to be "GET"} +\item{json_query}{Optional path to a json file containing the query parameters} + +\item{method}{The API query method to be used. Can be "POST" or "GET". Default: "POST".} \item{dataset_version}{Version of data set to be connected to} @@ -43,29 +51,185 @@ of "get-summary", "get-meta", "get-data" or "post-data"} \item{page}{Page number of query results to return} -\item{verbose}{Add extra contextual information whilst running} +\item{debug}{Run POST query in debug mode: logic, default: FALSE} + +\item{verbose}{Run with additional contextual messaging, logical, default = FALSE} } \value{ Data frame containing query results } \description{ -Create and send a query to the EES API. Queries can be constructed by including the -codes to the relevant flags to filter on time period, geographic level, location, -and data set specific filters. If none of the above are set in the function call, -then the entire data set will be retrieved. The data set id and specific indicators -of interest must be supplied explictly using the dataset_id and indicators params. +Create and send a query to the EES API. Queries can be supplied and run in one of 4 ways: +\itemize{ +\item Supplying a json query in a file to be sent with the POST method. +\item Supplying a json query in a string variable to be sent with the POST method. +\item Supplying parameters (time_periods, geographies, filter_items, indicators) to build a json +query is then sent with the POST method. +\item Supplying parameters (time_periods, geographies, filter_items, indicators) to build a json +query is then sent with the GET method. +} + +In all cases, the data set id must be supplied explicitly using the dataset_id. + +Details on the format of each parameter for the latter two methods are as follows. +\subsection{indicators}{ + +This must be supplied as a vector of sqids, which can be identified from the meta data using +\code{get_meta()}. +} + +\subsection{time_periods}{ + +Time periods should be supplied as a vector of periods and codes in the form: +\itemize{ +\item \code{"period|code"} +} + +For example, selecting the 2023 and 2024 academic years would require: +\code{time_period = c("2023|AY", "2024|AY")} +} + +\subsection{geographies}{ + +Geographies can be supplied as a vector or a data frame depending on the complexity of the +desired query. + +A vector will run a query returning \strong{any} rows meeting any of the given +geographies, i.e. geographies = \code{c("NAT", "REG")} will return all national and regional level +rows, whilst \code{c("NAT", "REG|code|E120000001")} will return all national level rows and all North +East rows. Specific locations are required to be supplied in the format +\code{"LEVEL|identifier_type|identifier"}, where identifier type can be either code or id and the +corresponding identifier is then the standard ONS code or the sqid given in the meta data +respectively. Using England as an example, these would be: +\itemize{ +\item \code{"NAT|code|E92000001"} +\item \code{"NAT|id|dP0Zw"} +} + +If you require a more complex selection, for example all LAs in a given region, then a data +frame should be supplied, with a row for each selection. Note however, that this will only work +when using the default \code{POST} method. The \code{GET} method is much more limited and can not process +more complex queries. + +The geography query data frame should contain the following columns: +\itemize{ +\item return_level: the geographic level to return (e.g. LA in the example above). +\item search_level: the geographic level of the search location (e.g. REG in the example above). +\item identifier_type: "code" or "id". +\item identifier: the code or id (sqid) for the search location (e.g. the code or sqid of the +region in the above example). +} + +Further rows can be added to add other geography searches to include in results. + +An example of a working geographies data frame can be obtained using \code{example_geography_query()}. +} + +\subsection{filter_items}{ + +Similarly to geographies, criteria for querying on filter items can be provided either via a +vector for simple queries or, for more complex queries, as a list. In both cases, vector or +list, filter items can only be supplied as sqids, i.e. the ids found in the meta data using +\code{get_meta(dataset_id)}. + +Providing a vector of sqids will effectively run a query returning any rows containing any of +the listed sqids. This therefore does not allow narrow searches based on a row or set of rows +matching multiple criteria. + +Providing a list structure can provide a more narrow query selecting individual rows based on +a combination of criteria. For example if we want rows that contain sqid1 in one column and +sqid2 in another, then we would pass a list of: +\code{filter_query <- list(column1 = c("sqid1"), column2 = c("sqid2"))} +Note that the naming of the entries in the list is not necessary, but may help in creating more +readable code. + +If we wish to create a query whereby we receive rows containing sqid1 in column 1 and sqid2 or +sqid3 in column 2, then the required list would be: +\code{filter_query <- list(column1 = c("sqid1"), column2 = c("sqid2", "sqid3"))} + +In this way, we can build up combinations of OR and AND criteria for a query across multiple +filter columns. + +\emph{Note again that the more complex querying using a list variable will only function when using +the \code{POST} method}. +} + +\subsection{Controlling paging}{ + +You can request a specific set of rows using the page and page_size parameters. Keeping the +default of page = NULL will return all rows matching the query. Setting page and page_size to +numerical values will attempt to return a subset of rows, with page_size defining the number of +rows and page defining which subset of rows to return from the query (i.e. page = 1, page_size = +20 will return the first 20 rows, page = 2 and page_size = 20 will return the second 20 rows +and so on). +} } \examples{ +# Run query_dataset() using a json query string input to json_query (this can also be done by +# passing a filename of a file containing your json query string). query_dataset( - example_id(), - geographic_levels = "NAT", - filter_items = example_id("filter_item"), - indicators = example_id("indicator") + example_id(group = "attendance"), + json_query = example_json_query() +) + +# If you don't want to have tio write your own json query, the rest of the examples illustrate +# how to use query_dataset() with parameters to construct queries in R. + +# Run query_dataset() to select rows containing either of two geographic locations and either of +# two filter items. +query_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = c("NAT|id|dP0Zw", "REG|id|rg3Nj"), + filter_items = c("CvuId", "6AXrf"), + page = 1, + page_size = 32 ) + +# Run query_dataset() using set parameters giving a combination of filter options +query_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = c("NAT"), + filter_items = list( + attendance_status = c("pmRSo", "7SdXo"), + attendance_type = c("CvuId", "6AXrf", "0k3T5", "YdkHK"), + education_phase = c("ThDPJ", "crH31"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ) +) + +# Run a query with a more complex geography selection. Return data for all of: +# - England +# - Yorkshire and the Humber +# - All LAs in Yorkshire and the Humber +example_geography_query("nat_yorks_yorkslas") +query_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = "2024|W23", + geographies = example_geography_query("nat_yorks_yorkslas"), + filter_items = list( + attendance_status = c("pmRSo"), + attendance_type = c("CvuId"), + education_phase = c("ThDPJ"), + day_number = c("uLQo4"), + reason = c("bBrtT") + ) +) + +# Run a basic query using GET instead of POST query_dataset( example_id(), + method = "GET", + geographic_levels = c("SCH"), + filter_items = example_id("filter_item"), indicators = example_id("indicator"), page = 1, page_size = 10 ) + } diff --git a/man/validate_ees_filter_type.Rd b/man/validate_ees_filter_type.Rd new file mode 100644 index 0000000..352161a --- /dev/null +++ b/man/validate_ees_filter_type.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation_rules.R +\name{validate_ees_filter_type} +\alias{validate_ees_filter_type} +\title{Validate filter type} +\usage{ +validate_ees_filter_type(filter_type) +} +\arguments{ +\item{filter_type}{type of filter being queried: "time_periods", "geographic_levels",} +} +\description{ +Validate filter type +} +\examples{ +validate_ees_filter_type("time_periods") +} diff --git a/man/validate_ees_id.Rd b/man/validate_ees_id.Rd index 67a21c6..ac797fb 100644 --- a/man/validate_ees_id.Rd +++ b/man/validate_ees_id.Rd @@ -4,12 +4,14 @@ \alias{validate_ees_id} \title{Validate element IDs} \usage{ -validate_ees_id(element_id, level = "publication") +validate_ees_id(element_id, level = "publication", verbose = FALSE) } \arguments{ \item{element_id}{ID for publication or a data set} \item{level}{ID level: "publication", "dataset", "location", "filter_item" or "indicator"} + +\item{verbose}{Run in verbose mode} } \description{ Validate element IDs diff --git a/man/validate_time_periods.Rd b/man/validate_time_periods.Rd new file mode 100644 index 0000000..5969a68 --- /dev/null +++ b/man/validate_time_periods.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validation_rules.R +\name{validate_time_periods} +\alias{validate_time_periods} +\title{Validate time periods} +\usage{ +validate_time_periods(time_periods) +} +\arguments{ +\item{time_periods}{Time periods required as a string ("period|code") or vector of strings} +} +\description{ +Validate time periods +} +\examples{ +validate_time_periods(c("2023|AY", "2024|AY")) +} diff --git a/tests/testthat/seed_tests.R b/tests/testthat/seed_tests.R index 9c11d19..1e0f6c8 100644 --- a/tests/testthat/seed_tests.R +++ b/tests/testthat/seed_tests.R @@ -13,13 +13,14 @@ # bulk refresh # - test data should be saved as rds files in tests/testthat/testdata/ - # Refresh all test data seed_tests <- function() { message("Updating publication list") seed_get_publications() message("Updating data catalogue list") seed_get_data_catalogue() + seed_query_dataset() + seed_post_dataset() message("Updating example meta data") seed_get_meta() } @@ -40,6 +41,43 @@ seed_get_data_catalogue <- function() { ) } +seed_query_dataset <- function() { + message("Updating example json-from-file data set") + result <- eesyapi::query_dataset( + eesyapi::example_id(group = "attendance"), + json_query = "tests/testthat/testdata/test_query.json" + ) + message(" * Number records = ", nrow(result)) + result |> saveRDS( + file = "tests/testthat/testdata/example_json-from-file_dataset.rds" + ) + message("Updating example json-from-string data set") + result <- eesyapi::query_dataset( + eesyapi::example_id(group = "attendance"), + json_query = eesyapi::example_json_query() + ) + message(" * Number records = ", nrow(result)) + result |> saveRDS( + file = "tests/testthat/testdata/example_json-from-string_dataset.rds" + ) +} + +seed_post_dataset <- function() { + message("Updating example data set from filter_items param selection") + result <- eesyapi::query_dataset( + eesyapi::example_id(group = "attendance"), + indicators = eesyapi::example_id("indicator", group = "attendance"), + time_periods = eesyapi::example_id("time_period", group = "attendance"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_items_long", group = "attendance") + ) + message(" * Number records = ", nrow(result)) + result |> + saveRDS( + file = "tests/testthat/testdata/example_post_dataset.rds" + ) +} + # Refresh the data sets list from the standard example publication seed_get_meta <- function() { saveRDS( diff --git a/tests/testthat/test-http_request_error.R b/tests/testthat/test-http_request_error.R index 5f449cf..60f710b 100644 --- a/tests/testthat/test-http_request_error.R +++ b/tests/testthat/test-http_request_error.R @@ -7,28 +7,14 @@ test_that("Successful connection message", { test_that("Bad URL / query message", { expect_error( - http_request_error(list(status = 404, errors = "Demo error")), + api_url("get-meta", dataset_id = example_id("publication")) |> + httr::GET() |> + http_request_error(), paste0( "HTTP connection error: ", 404, "\n", - "Invalid query, data set ID, data set version or API version submitted to API.", - "\n", - "Demo error" - ) - ) -}) - -test_that("Server error message", { - expect_error( - http_request_error(list(status = 503)), - paste0( - "HTTP connection error: ", - 503, - "\n", - "Internal server error encountered - please contact the EES API team at ", - "explore.statistics@education.gov.uk ", - "providing the query you were attempting to submit." + "Invalid query, data set ID, data set version or API version submitted to API." ) ) }) diff --git a/tests/testthat/test-query_dataset.R b/tests/testthat/test-query_dataset.R index 986d657..b5fb042 100644 --- a/tests/testthat/test-query_dataset.R +++ b/tests/testthat/test-query_dataset.R @@ -15,3 +15,129 @@ test_that("No indicator supplied", { query_dataset(example_id()) ) }) + +test_that("Run query from file", { + query_result <- query_dataset( + example_id(group = "attendance"), + json_query = "testdata/test_query.json" + ) + query_result <- query_result |> + dplyr::arrange(dplyr::across(colnames(query_result))) + expect_equal( + query_result, + readRDS("testdata/example_json-from-file_dataset.rds") |> + dplyr::select(dplyr::all_of(colnames(query_result))) |> + dplyr::arrange(dplyr::across(colnames(query_result))) + ) +}) + +test_that("Run query from string", { + query_result <- query_dataset( + example_id(group = "attendance"), + json_query = example_json_query() + ) + query_result <- query_result |> + dplyr::arrange(dplyr::across(colnames(query_result))) + expect_equal( + query_result, + readRDS("testdata/example_json-from-string_dataset.rds") |> + dplyr::select(dplyr::all_of(colnames(query_result))) |> + dplyr::arrange(dplyr::across(colnames(query_result))) + ) +}) + +test_that("Time period query returns expected time periods", { + expect_equal( + post_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = eesyapi::example_id("time_periods", group = "attendance"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_item", group = "attendance") + ) |> + dplyr::select("code", "period") |> + dplyr::distinct() |> + dplyr::arrange(code, period), + data.frame( + code = c("W21", "W23"), + period = c("2024", "2024") + ) + ) +}) + +test_that("Time period query errors on badly formatted time period", { + expect_error( + post_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = c("2024W21", "2024|W23"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_item", group = "attendance") + ) + ) +}) + + +test_that("Geography query returns expected geographies", { + expect_equal( + post_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = eesyapi::example_id("time_period", group = "attendance"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_item", group = "attendance") + ) |> + dplyr::select(geographic_level, NAT, REG) |> + dplyr::distinct() |> + dplyr::arrange(geographic_level), + data.frame( + geographic_level = c("NAT", "REG"), + NAT = c("dP0Zw", "dP0Zw"), + REG = c(NA, "rg3Nj") + ) + ) +}) + +test_that("Test filter-combinations POST dataset query", { + query_result <- query_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = eesyapi::example_id("time_period", group = "attendance"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_items_long", group = "attendance") + ) + query_result <- query_result |> + dplyr::arrange(dplyr::across(colnames(query_result))) + expect_equal( + query_result, + readRDS("testdata/example_post_dataset.rds") |> + dplyr::select(dplyr::all_of(colnames(query_result))) |> + dplyr::arrange(dplyr::across(colnames(query_result))) + ) + query_result <- query_dataset( + example_id(group = "attendance"), + indicators = example_id("indicator", group = "attendance"), + time_periods = eesyapi::example_id("time_period", group = "attendance"), + geographies = eesyapi::example_id("location_ids", group = "attendance"), + filter_items = eesyapi::example_id("filter_items_short", group = "attendance") + ) |> + dplyr::select("5TYdi", "mU59K", "Db3Qe", "emJuS", "4kdUZ") |> + dplyr::distinct() + expect_equal( + query_result, + data.frame( + `5TYdi` = c("uLQo4", "uLQo4", "uLQo4", "uLQo4"), + `mU59K` = c("bBrtT", "bBrtT", "bBrtT", "bBrtT"), + `Db3Qe` = c("pmRSo", "pmRSo", "pmRSo", "pmRSo"), + `emJuS` = c("CvuId", "6AXrf", "CvuId", "6AXrf"), + `4kdUZ` = c("ThDPJ", "ThDPJ", "crH31", "crH31") + ) |> dplyr::rename_with(~ stringr::str_replace_all(., "X", "")) + ) +}) + +test_that("Indicators not found in data set", { + expect_error( + query_dataset(example_id(), indicators = c("uywet", "uywed")), + "\nHTTP connection error: 400\nOne or more indicators could not be found.\n uywet, uywed" + ) +}) diff --git a/tests/testthat/test-validation.R b/tests/testthat/test-validation.R new file mode 100644 index 0000000..e4f9e25 --- /dev/null +++ b/tests/testthat/test-validation.R @@ -0,0 +1,56 @@ +test_that("Location validation works", { + expect_no_error( + validate_ees_id("NAT|id|23897", level = "location") + ) + expect_no_error( + validate_ees_id(c("NAT|id|23897", "REG|code|sd897asdf"), level = "location") + ) + expect_error( + validate_ees_id("NATid23897", level = "location") + ) + expect_error( + validate_ees_id(c("NATid|23897", "REG|code|sd897"), level = "location") + ) +}) + +test_that("Time period validation", { + expect_no_error( + validate_time_periods(c("2023|AY", "2023|FY")) + ) + expect_no_error( + validate_time_periods("2023|AY") + ) + expect_error( + validate_time_periods(c("2023AY", "2023|FY")) + ) + expect_error( + validate_time_periods("2023AY") + ) +}) + +test_that("Filter type validation", { + expect_no_error( + validate_ees_filter_type("time_periods") + ) + expect_no_error( + validate_ees_filter_type("locations") + ) + expect_no_error( + validate_ees_filter_type("geographic_levels") + ) + expect_no_error( + validate_ees_filter_type("filter_items") + ) + expect_error( + validate_ees_filter_type("time_period") + ) + expect_error( + validate_ees_filter_type("location") + ) + expect_error( + validate_ees_filter_type("geographic") + ) + expect_error( + validate_ees_filter_type("filter_item") + ) +}) diff --git a/tests/testthat/testdata/example_json-from-file_dataset.rds b/tests/testthat/testdata/example_json-from-file_dataset.rds new file mode 100644 index 0000000..f2c9f80 Binary files /dev/null and b/tests/testthat/testdata/example_json-from-file_dataset.rds differ diff --git a/tests/testthat/testdata/example_json-from-string_dataset.rds b/tests/testthat/testdata/example_json-from-string_dataset.rds new file mode 100644 index 0000000..b92945e Binary files /dev/null and b/tests/testthat/testdata/example_json-from-string_dataset.rds differ diff --git a/tests/testthat/testdata/example_meta_parsed.rds b/tests/testthat/testdata/example_meta_parsed.rds index 11d49b9..692ca01 100644 Binary files a/tests/testthat/testdata/example_meta_parsed.rds and b/tests/testthat/testdata/example_meta_parsed.rds differ diff --git a/tests/testthat/testdata/example_meta_unparsed.rds b/tests/testthat/testdata/example_meta_unparsed.rds index 1675282..6fa1f51 100644 Binary files a/tests/testthat/testdata/example_meta_unparsed.rds and b/tests/testthat/testdata/example_meta_unparsed.rds differ diff --git a/tests/testthat/testdata/example_post_dataset.rds b/tests/testthat/testdata/example_post_dataset.rds new file mode 100644 index 0000000..18b9888 Binary files /dev/null and b/tests/testthat/testdata/example_post_dataset.rds differ diff --git a/tests/testthat/testdata/example_publication_catalogue.rds b/tests/testthat/testdata/example_publication_catalogue.rds index c3efc29..feef31d 100644 Binary files a/tests/testthat/testdata/example_publication_catalogue.rds and b/tests/testthat/testdata/example_publication_catalogue.rds differ diff --git a/tests/testthat/testdata/example_publication_datasets.rds b/tests/testthat/testdata/example_publication_datasets.rds index f815280..b0142a9 100644 Binary files a/tests/testthat/testdata/example_publication_datasets.rds and b/tests/testthat/testdata/example_publication_datasets.rds differ diff --git a/tests/testthat/testdata/test_query.json b/tests/testthat/testdata/test_query.json new file mode 100644 index 0000000..6179319 --- /dev/null +++ b/tests/testthat/testdata/test_query.json @@ -0,0 +1,30 @@ +{ + "criteria": { + "and": [ + { + "geographicLevels": { + "eq": "NAT" + } + },{ + "timePeriods": { + "in": [ + { + "period": "2024", + "code": "W26" + } + ] + } + }, { + "filters": { + "eq": "jYyAM" + } + } + ] + }, + "indicators": [ + "bqZtT" + ], + "debug": false, + "page": 1, + "pageSize": 64 +}