diff --git a/DESCRIPTION b/DESCRIPTION index dedcdf2d..fb47dc3e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,6 +8,8 @@ Authors@R: c( role = "aut", comment = c(ORCID = "0000-0003-3445-7562")), person("Pieter", "Huybrechts", email = "pieter.huybrechts@inbo.be", role = "aut", comment = c(ORCID = "0000-0002-6658-6062")), + person("Kyle", "Husmann", email = "kdh38@psu.edu", + role = "ctb", comment = c(ORCID = "0000-0001-9875-8976")), person("Research Institute for Nature and Forest (INBO)", role = "cph", comment = "https://www.vlaanderen.be/inbo/en-gb/"), person("LifeWatch Belgium", @@ -51,4 +53,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index e9020289..e9e26a5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,datapackage) export(add_resource) export(check_package) export(create_package) diff --git a/NEWS.md b/NEWS.md index 096e0007..c5b22d76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,11 @@ # frictionless (development version) -* `add_resource()` now supports adding additional resource properties via the `...` argument. +* A Data Package object (`package`) now has a `datapackage` class (#184). As a result: + * New function `print()` prints a a human-readable summary of the Data Package (#155). + * `check_package()` will warn if the class is missing. +* `read_resource()` no longer returns a message regarding rights and credit (#121). If `package$id` is a URL (e.g. a DOI) it will be mentioned in `print()`. * `read_resource()` now supports column selection via the `col_select` argument from `readr::read_delim()`. This can vastly improve reading speed (#123). +* `add_resource()` now supports adding additional resource properties via the `...` argument. * `create_package()` now accepts a `descriptor` argument so that a Data Package object can be created from an existing object (#184). It will always validate the created object with `create_package()`. * `check_package()` is now a public function, so it can be used by other packages (#185). * `readr::problems()` is included in NAMESPACE so users don't have to load readr to inspect parsing issues. The function is mentioned in the documentation of `read_resource()` (#129). @@ -9,7 +13,6 @@ * Messages use semantic colours for variables, parameters, fields, etc. * Messages and warnings can be silenced with a global or local option, see [this blog post](https://ropensci.org/blog/2024/02/06/verbosity-control-packages/). * Each call has an [rlang](https://cran.r-project.org/package=rlang) class, e.g. `frictionless_error_fields_without_name`, making it easier to test for specific errors. -* A `package` object now has a `datapackage` class (#184), `check_package()` will warn if it is missing. * The dependencies [glue](https://cran.r-project.org/package=glue) and [assertthat](https://cran.r-project.org/package=assertthat) are removed (#163). The functionality of glue is replaced by cli, `assertthat::assert()` calls are now `if()` functions. * Adhere to the requirements of [checklist](https://github.com/inbo/checklist), so that `.zenodo.json` can be created with `checklist::update_citation()`. * Add [Pieter Huybrechts](https://orcid.org/0000-0002-6658-6062) as author. Welcome Pieter! diff --git a/R/add_resource.R b/R/add_resource.R index d659ccf1..9b577e0e 100644 --- a/R/add_resource.R +++ b/R/add_resource.R @@ -192,5 +192,5 @@ add_resource <- function(package, resource_name, data, schema = NULL, # Add resource (needs to be wrapped in its own list) package$resources <- append(package$resources, list(resource)) - package + return(package) } diff --git a/R/check_package.R b/R/check_package.R index eec6f5fe..3c59e351 100644 --- a/R/check_package.R +++ b/R/check_package.R @@ -6,6 +6,12 @@ #' @return `TRUE` or error. #' @family check functions #' @export +#' @examples +#' # Load the example Data Package +#' package <- example_package +#' +#' # Check if the Data Package is valid (invisible return) +#' check_package(package) check_package <- function(package) { general_message <- "{.arg package} must be a Data Package object." tip_message <- paste( diff --git a/R/check_path.R b/R/check_path.R index 6c984186..4f9dcf8d 100644 --- a/R/check_path.R +++ b/R/check_path.R @@ -62,5 +62,6 @@ check_path <- function(path, directory = NULL, safe = FALSE) { ) } } + return(path) } diff --git a/R/create_package.R b/R/create_package.R index 5c1e530e..a6fc267f 100644 --- a/R/create_package.R +++ b/R/create_package.R @@ -15,13 +15,17 @@ #' it is valid. #' #' @param descriptor List to be made into a Data Package object. -#' If `NULL`, an empty Data Package object will be created from scratch. +#' If undefined, an empty Data Package will be created from scratch. #' @return Data Package object. #' @family create functions #' @export #' @examples #' # Create a Data Package #' package <- create_package() +#' +#' package +#' +#' # See the structure of the (empty) Data Package #' str(package) create_package <- function(descriptor = NULL) { if (!is.null(descriptor) && !is.list(descriptor)) { @@ -42,6 +46,6 @@ create_package <- function(descriptor = NULL) { # Check that created package is valid check_package(descriptor) - + return(descriptor) } diff --git a/R/create_schema.R b/R/create_schema.R index ef359204..1f6a17a5 100644 --- a/R/create_schema.R +++ b/R/create_schema.R @@ -124,5 +124,5 @@ create_schema <- function(data) { recursive = TRUE ) - schema + return(schema) } diff --git a/R/get_resource.R b/R/get_resource.R index 1e1789ea..ce0edc2d 100644 --- a/R/get_resource.R +++ b/R/get_resource.R @@ -65,5 +65,5 @@ get_resource <- function(package, resource_name) { resource$read_from <- "data" } - resource + return(resource) } diff --git a/R/get_schema.R b/R/get_schema.R index a21c8e84..eafbe141 100644 --- a/R/get_schema.R +++ b/R/get_schema.R @@ -43,5 +43,5 @@ get_schema <- function(package, resource_name) { # Check schema check_schema(schema) - schema + return(schema) } diff --git a/R/print.R b/R/print.R new file mode 100644 index 00000000..000bf125 --- /dev/null +++ b/R/print.R @@ -0,0 +1,48 @@ +#' Print a Data Package +#' +#' Prints a human-readable summary of a Data Package, including its resources +#' and a link to more information (if provided in `package$id`). +#' +#' @param x Data Package object, created with [read_package()] or +#' [create_package()]. +#' @param ... Further arguments, they are ignored by this function. +#' @return [print()] with a summary of the Data Package object. +#' @family datapackage functions +#' @export +#' @examples +#' # Load the example Data Package +#' package <- example_package +#' +#' # Print a summary of the Data Package +#' package # Or print(package) +print.datapackage <- function(x, ...) { + # All prints should use cat (= cli::cat() helpers) + + # List resources + resources <- resources(x) + cli::cat_line( + cli::format_inline( + "A Data Package with {length(resources)} resource{?s}{?./:/:}" + ) + ) + if (length(resources) > 0) { + cli::cat_bullet(resources, bullet = "bullet") + } + + # Include link (DOI) if available in package$id + if (startsWith(replace_null(x$id, ""), "http")) { + cli::cat_line( + cli::format_inline("For more information, see {.url {x$id}}.") + ) + } + + # Provide help + cli::cat_line( + cli::format_inline( + "Use {.fun unclass} to print the Data Package as a list." + ), + col = "silver" + ) + + invisible(x) +} diff --git a/R/read_package.R b/R/read_package.R index 7b6b7e21..76159e23 100644 --- a/R/read_package.R +++ b/R/read_package.R @@ -14,6 +14,8 @@ #' system.file("extdata", "datapackage.json", package = "frictionless") #' ) #' +#' package +#' #' # Access the Data Package properties #' package$name #' package$created @@ -44,22 +46,6 @@ read_package <- function(file = "datapackage.json") { # Add directory descriptor$directory <- dirname(file) # Also works for URLs - # Inform user regarding rights and citation - message <- c( - "Please make sure you have the right to access data from this Data Package - for your intended use.", - "Follow applicable norms or requirements to credit the dataset and its - authors." - ) - if (!is.null(descriptor$id)) { - if (startsWith(descriptor$id, "http")) { - message <- c( - message, - "i" = "For more information, see {.url {descriptor$id}}." - ) - } - } - cli::cli_inform(message, class = "frictionless_message_usage_rights") - + # Create package create_package(descriptor) } diff --git a/R/read_resource.R b/R/read_resource.R index 8ae05037..d101f0ad 100644 --- a/R/read_resource.R +++ b/R/read_resource.R @@ -184,8 +184,7 @@ #' system.file("extdata", "datapackage.json", package = "frictionless") #' ) #' -#' # List resources -#' resources(package) +#' package #' #' # Read data from the resource "observations" #' read_resource(package, "observations") diff --git a/R/remove_resource.R b/R/remove_resource.R index bcd76d73..8e648b06 100644 --- a/R/remove_resource.R +++ b/R/remove_resource.R @@ -28,5 +28,5 @@ remove_resource <- function(package, resource_name) { (x$name == resource_name) }) - package + return(package) } diff --git a/R/utils.R b/R/utils.R index dd8a15e9..4938fb5e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -86,5 +86,6 @@ read_descriptor <- function(x, directory = NULL, safe = FALSE) { x <- jsonlite::fromJSON(x, simplifyDataFrame = FALSE, simplifyVector = TRUE) } } + return(x) } diff --git a/R/write_package.R b/R/write_package.R index 3468543c..363f648a 100644 --- a/R/write_package.R +++ b/R/write_package.R @@ -27,8 +27,7 @@ #' system.file("extdata", "datapackage.json", package = "frictionless") #' ) #' -#' # List resources -#' resources(package) +#' package #' #' # Write the (unchanged) Data Package to disk #' write_package(package, directory = "my_directory") diff --git a/README.Rmd b/README.Rmd index c5494b37..c60abb0b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -63,6 +63,8 @@ library(frictionless) # reading them, which is convenient and fast. package <- read_package("https://zenodo.org/record/5879096/files/datapackage.json") +package + # List resources resources(package) @@ -80,6 +82,8 @@ my_package <- create_package() %>% add_resource(resource_name = "iris", data = iris) +my_package + # Write the Data Package to disk my_package %>% write_package("my_directory") diff --git a/README.md b/README.md index 6aa23ae1..541d82ac 100644 --- a/README.md +++ b/README.md @@ -67,10 +67,14 @@ library(frictionless) # This gives you access to all Data Resources of the Data Package without # reading them, which is convenient and fast. package <- read_package("https://zenodo.org/record/5879096/files/datapackage.json") -#> Please make sure you have the right to access data from this Data Package for -#> your intended use. -#> Follow applicable norms or requirements to credit the dataset and its authors. -#> ℹ For more information, see . + +package +#> A Data Package with 3 resources: +#> • reference-data +#> • gps +#> • acceleration +#> For more information, see . +#> Use `unclass()` to print the Data Package as a list. # List resources resources(package) @@ -111,6 +115,11 @@ my_package <- create_package() %>% add_resource(resource_name = "iris", data = iris) +my_package +#> A Data Package with 1 resource: +#> • iris +#> Use `unclass()` to print the Data Package as a list. + # Write the Data Package to disk my_package %>% write_package("my_directory") diff --git a/_pkgdown.yml b/_pkgdown.yml index 6d4547d3..1a02bf30 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -30,4 +30,5 @@ reference: - has_concept("write functions") - title: "Miscellaneous" contents: + - has_concept("datapackage functions") - example_package diff --git a/man/check_package.Rd b/man/check_package.Rd index 3d71bb8b..f4ba0903 100644 --- a/man/check_package.Rd +++ b/man/check_package.Rd @@ -16,4 +16,11 @@ check_package(package) \description{ Check if an object is a Data Package object with the required properties. } +\examples{ +# Load the example Data Package +package <- example_package + +# Check if the Data Package is valid (invisible return) +check_package(package) +} \concept{check functions} diff --git a/man/create_package.Rd b/man/create_package.Rd index a34ff72f..15a15e2d 100644 --- a/man/create_package.Rd +++ b/man/create_package.Rd @@ -8,7 +8,7 @@ create_package(descriptor = NULL) } \arguments{ \item{descriptor}{List to be made into a Data Package object. -If \code{NULL}, an empty Data Package object will be created from scratch.} +If undefined, an empty Data Package will be created from scratch.} } \value{ Data Package object. @@ -33,6 +33,10 @@ it is valid. \examples{ # Create a Data Package package <- create_package() + +package + +# See the structure of the (empty) Data Package str(package) } \seealso{ diff --git a/man/print.datapackage.Rd b/man/print.datapackage.Rd new file mode 100644 index 00000000..09ad0676 --- /dev/null +++ b/man/print.datapackage.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print.R +\name{print.datapackage} +\alias{print.datapackage} +\title{Print a Data Package} +\usage{ +\method{print}{datapackage}(x, ...) +} +\arguments{ +\item{x}{Data Package object, created with \code{\link[=read_package]{read_package()}} or +\code{\link[=create_package]{create_package()}}.} + +\item{...}{Further arguments, they are ignored by this function.} +} +\value{ +\code{\link[=print]{print()}} with a summary of the Data Package object. +} +\description{ +Prints a human-readable summary of a Data Package, including its resources +and a link to more information (if provided in \code{package$id}). +} +\examples{ +# Load the example Data Package +package <- example_package + +# Print a summary of the Data Package +package # Or print(package) +} +\concept{datapackage functions} diff --git a/man/read_package.Rd b/man/read_package.Rd index 168861d0..92adbfda 100644 --- a/man/read_package.Rd +++ b/man/read_package.Rd @@ -23,6 +23,8 @@ package <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") ) +package + # Access the Data Package properties package$name package$created diff --git a/man/read_resource.Rd b/man/read_resource.Rd index f37083d9..ec9c34a6 100644 --- a/man/read_resource.Rd +++ b/man/read_resource.Rd @@ -209,8 +209,7 @@ package <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") ) -# List resources -resources(package) +package # Read data from the resource "observations" read_resource(package, "observations") diff --git a/man/write_package.Rd b/man/write_package.Rd index 93d2f1cf..4b1b2280 100644 --- a/man/write_package.Rd +++ b/man/write_package.Rd @@ -42,8 +42,7 @@ package <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") ) -# List resources -resources(package) +package # Write the (unchanged) Data Package to disk write_package(package, directory = "my_directory") diff --git a/tests/testthat/data/valid_minimal_extra.json b/tests/testthat/data/valid_minimal_extra.json deleted file mode 100644 index e32539b7..00000000 --- a/tests/testthat/data/valid_minimal_extra.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "id": "https://example.com", - "resources": [ - { - "name": "deployments" - }, - { - "name": "observations" - }, - { - "name": "media" - } - ] -} diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R new file mode 100644 index 00000000..47625a63 --- /dev/null +++ b/tests/testthat/test-print.R @@ -0,0 +1,80 @@ +test_that("print() returns output invisibly", { + expect_output(output <- withVisible(print(example_package))) + expect_false(output$visible) +}) + +test_that("print() informs about the resources and unclass()", { + unclass_message <- "Use `unclass()` to print the Data Package as a list." + + # 3 resources (example package) + p <- example_package + expect_output( + print(p), + regexp = paste( + "A Data Package with 3 resources:", + "* deployments", + "* observations", + "* media", + unclass_message, + sep = "\n" + ), + fixed = TRUE + ) + + # 1 resource + p1 <- create_package() + df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) + p1 <- add_resource(p1, "new", df) + expect_output( + print(p1), + regexp = paste( + "A Data Package with 1 resource:", + "* new", + unclass_message, + sep = "\n" + ), + fixed = TRUE + ) + + # 0 resources + p0 <- create_package() + expect_output( + print(p0), + regexp = paste( + "A Data Package with 0 resources.", + unclass_message, + sep = "\n" + ), + fixed = TRUE + ) +}) + +test_that("print() informs about information in package$id", { + unclass_message <- "Use `unclass()` to print the Data Package as a list." + + # package$id is a URL, inform + p <- create_package() + p$id <- "https://example.com" + expect_output( + print(p), + regexp = paste( + "A Data Package with 0 resources.", + "For more information, see .", + unclass_message, + sep = "\n" + ), + fixed = TRUE + ) + + # package$id is not a URL, don't inform + p$id <- "not_a_url" + expect_output( + print(p), + regexp = paste( + "A Data Package with 0 resources.", + unclass_message, + sep = "\n" + ), + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-read_package.R b/tests/testthat/test-read_package.R index e45e9340..644d31f5 100644 --- a/tests/testthat/test-read_package.R +++ b/tests/testthat/test-read_package.R @@ -2,8 +2,8 @@ test_that("read_package() returns a valid Data Package reading from path", { # Load example package locally and a valid minimal one p_path <- system.file("extdata", "datapackage.json", package = "frictionless") minimal_path <- test_path("data/valid_minimal.json") - p_local <- suppressMessages(read_package(p_path)) - p_minimal <- suppressMessages(read_package(minimal_path)) + p_local <- read_package(p_path) + p_minimal <- read_package(minimal_path) # Returns a list with required properties expect_true(check_package(p_local)) @@ -24,7 +24,7 @@ test_that("read_package() returns a valid Data Package reading from url", { # Load example package remotely p_url <- file.path("https://raw.githubusercontent.com/frictionlessdata/", "frictionless-r/main/inst/extdata/datapackage.json") - p_remote <- suppressMessages(read_package(p_url)) + p_remote <- read_package(p_url) # Returns a list with required properties expect_true(check_package(p_remote)) @@ -37,30 +37,6 @@ test_that("read_package() returns a valid Data Package reading from url", { expect_identical(p_remote$directory, gsub("/datapackage.json", "", p_url)) }) -test_that("read_package() shows message about rights and citation", { - # Load example package and a minimal valid one a URL in "id" - p_path <- system.file("extdata", "datapackage.json", package = "frictionless") - minimal_extra_path <- test_path("data/valid_minimal_extra.json") - expect_message( - read_package(p_path), - class = "frictionless_message_usage_rights" - ) - expect_message( - read_package(p_path), - regexp = paste( - "Please make sure you have the right to access data from this Data", - "Package for your intended use.\nFollow applicable norms or requirements", - "to credit the dataset and its authors." - ), - fixed = TRUE - ) - expect_message( - read_package(minimal_extra_path), - regexp = "For more information, see .", - fixed = TRUE - ) -}) - test_that("read_package() returns error on missing file and properties", { skip_if_offline() # Incorrect type @@ -120,18 +96,14 @@ test_that("read_package() allows descriptor at absolute or relative parent path", { relative_path <- "../testthat/data/valid_minimal.json" expect_true( - check_package(suppressMessages(read_package(relative_path))) + check_package(read_package(relative_path)) ) absolute_path <- normalizePath("data/valid_minimal.json") expect_true( - check_package(suppressMessages(read_package(absolute_path))) + check_package(read_package(absolute_path)) ) }) test_that("read_package() allows YAML descriptor", { - expect_true( - check_package( - suppressMessages(read_package(test_path("data/valid_minimal.yml"))) - ) - ) + expect_true(check_package(read_package(test_path("data/valid_minimal.yml")))) }) diff --git a/tests/testthat/test-read_resource.R b/tests/testthat/test-read_resource.R index f991270b..8d6faec0 100644 --- a/tests/testthat/test-read_resource.R +++ b/tests/testthat/test-read_resource.R @@ -268,9 +268,9 @@ test_that("read_resource() can read local files", { p <- example_package resource <- read_resource(p, "deployments") # local resource, remote package - p_local <- suppressMessages(read_package( + p_local <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) expect_identical(read_resource(p_local, "deployments"), resource) }) @@ -457,7 +457,7 @@ test_that("read_resource() understands encoding", { test_that("read_resource() handles decimalChar/groupChar properties", { expected_value <- 3000000.3 - p <- suppressMessages(read_package(test_path("data/mark.json"))) + p <- read_package(test_path("data/mark.json")) # Default decimalChar/groupChar resource <- read_resource(p, "mark_default") @@ -578,7 +578,7 @@ test_that("read_resource() can read compressed files", { }) test_that("read_resource() handles strings", { - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "string") expect_type(resource$str, "character") @@ -589,7 +589,7 @@ test_that("read_resource() handles strings", { }) test_that("read_resource() handles numbers", { - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "number") # Leading/trailing zeros are optional, + is assumed @@ -622,7 +622,7 @@ test_that("read_resource() handles numbers", { }) test_that("read_resource() handles integers (as doubles)", { - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "integer") # Leading/trailing zeros are optional, + is assumed @@ -644,7 +644,7 @@ test_that("read_resource() handles integers (as doubles)", { }) test_that("read_resource() handles booleans", { - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "boolean") # Default trueValues/falseValues are cast to logical @@ -656,7 +656,7 @@ test_that("read_resource() handles booleans", { test_that("read_resource() handles dates", { expected_value <- as.Date("2013-11-23") - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "date") # This test covers: # - year: %Y %y @@ -677,7 +677,7 @@ test_that("read_resource() handles dates", { test_that("read_resource() handles times", { expected_value <- hms::hms(0, 30, 8) # "08:30:00" - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "time") # This test covers: # - hour: %H (including 1 digit) %I + %p @@ -701,7 +701,7 @@ test_that("read_resource() handles times", { test_that("read_resource() handles datetimes", { expected_value <- as.POSIXct("2013-11-23 08:30:00", tz = "UTC") - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "datetime") expect_identical(resource$dttm_undefined, resource$dttm_default) @@ -716,7 +716,7 @@ test_that("read_resource() handles datetimes", { }) test_that("read_resource() handles other types", { - p <- suppressMessages(read_package(test_path("data/types.json"))) + p <- read_package(test_path("data/types.json")) resource <- read_resource(p, "other") # Interpret year, yearmonth as dates diff --git a/tests/testthat/test-write_package.R b/tests/testthat/test-write_package.R index 4eb8502b..ef20c59f 100644 --- a/tests/testthat/test-write_package.R +++ b/tests/testthat/test-write_package.R @@ -5,9 +5,7 @@ test_that("write_package() returns output Data Package (invisibly)", { dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) p_written <- suppressMessages(write_package(p, dir)) - p_from_file <- suppressMessages(read_package( - file.path(dir, "datapackage.json") - )) + p_from_file <- read_package(file.path(dir, "datapackage.json")) # p_from_file$directory will differ: overwrite to make the same p_from_file$directory <- p_written$directory @@ -50,7 +48,7 @@ test_that("write_package() writes unaltered datapackage.json as is", { p_file <- system.file("extdata", "datapackage.json", package = "frictionless") json_original <- readr::read_lines(p_file) # Will use line endings of system - p <- suppressMessages(read_package(p_file)) + p <- read_package(p_file) dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) suppressMessages(write_package(p, dir)) @@ -63,9 +61,9 @@ test_that("write_package() writes unaltered datapackage.json as is", { test_that("write_package() does not overwrite existing data files", { skip_if_offline() - p <- suppressMessages(read_package( + p <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) dir.create(dir) @@ -86,9 +84,9 @@ test_that("write_package() does not overwrite existing data files", { test_that("write_package() copies file(s) for path = local in local package", { skip_if_offline() - p <- suppressMessages(read_package( + p <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) p$resources[[2]]$path[[2]] <- "observations_2.csv" # Make one URL a local path p <- add_resource(p, "new", test_path("data/df.csv")) dir <- file.path(tempdir(), "package") @@ -141,9 +139,9 @@ test_that("write_package() downloads file(s) for path = local in remote test_that("write_package() leaves as is for path = URL in local package", { skip_if_offline() - p <- suppressMessages(read_package( + p <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) p <- add_resource(p, "new", file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r", "main/tests/testthat/data/df.csv" @@ -185,9 +183,9 @@ test_that("write_package() leaves as is for path = URL in remote package", { test_that("write_package() leaves as is for data = json in local package", { skip_if_offline() - p <- suppressMessages(read_package( + p <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) p_written <- suppressMessages(write_package(p, dir)) @@ -215,9 +213,9 @@ test_that("write_package() leaves as is for data = json in remote package", { test_that("write_package() creates file for data = df in local package", { skip_if_offline() - p <- suppressMessages(read_package( + p <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") - )) + ) df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) p <- add_resource(p, "new", df) dir <- file.path(tempdir(), "package") @@ -316,6 +314,6 @@ test_that("write_package() will gzip file for compress = TRUE", { expect_false(file.exists(file.path(dir, "new.csv"))) # Written file can be read by read_resource() - p_reread <- suppressMessages(read_package(file.path(dir, "datapackage.json"))) + p_reread <- read_package(file.path(dir, "datapackage.json")) expect_identical(read_resource(p_reread, "new"), dplyr::as_tibble(df)) }) diff --git a/vignettes/frictionless.Rmd b/vignettes/frictionless.Rmd index b3c8c417..aab6f5f2 100644 --- a/vignettes/frictionless.Rmd +++ b/vignettes/frictionless.Rmd @@ -32,14 +32,23 @@ library(frictionless) To read a Data Package, you need to know the path or URL to its descriptor file, named `datapackage.json`. That file describes the Data Package, provides access points to its Data Resources and can contain dataset-level metadata. Let's read a Data Package descriptor file published on [Zenodo](https://doi.org/10.5281/zenodo.5879096): -``` r +```r package <- read_package("https://zenodo.org/record/5879096/files/datapackage.json") -#> Please make sure you have the right to access data from this Data Package for your intended use. -#> Follow applicable norms or requirements to credit the dataset and its authors. -#> For more information, see https://doi.org/10.5281/zenodo.5879096 ``` -`read_package()` returns the content of `datapackage.json` as a list, printed here with `str()` to improve readability: +`read_package()` returns the content of `datapackage.json` as a list with class `datapackage`. When printing a Data Package, you get a summary of its contents: + +```r +package +#> A Data Package with 3 resources: +#> • reference-data +#> • gps +#> • acceleration +#> For more information, see . +#> Use `unclass()` to print the Data Package as a list. +``` + +Since a Data Package is a list, you can pass it to functions that work on lists, such as `str()`: ```r str(package, list.len = 3) @@ -75,48 +84,54 @@ resources(package) This Data Package has 3 resources. Let's read the data from the `gps` resource into a data frame: ```r +package <- read_package("https://zenodo.org/record/5879096/files/datapackage.json") gps <- read_resource(package, "gps") gps #> # A tibble: 73,047 × 21 -#> event-i…¹ visible timestamp locat…² locat…³ bar:b…⁴ exter…⁵ gps:d…⁶ -#> -#> 1 1.43e10 TRUE 2018-05-25 16:11:37 4.25 51.3 NA 32.5 2 -#> 2 1.43e10 TRUE 2018-05-25 16:16:41 4.25 51.3 NA 32.8 2.1 -#> 3 1.43e10 TRUE 2018-05-25 16:21:29 4.25 51.3 NA 34.1 2.1 -#> 4 1.43e10 TRUE 2018-05-25 16:26:28 4.25 51.3 NA 34.5 2.2 -#> 5 1.43e10 TRUE 2018-05-25 16:31:21 4.25 51.3 NA 34.1 2.2 -#> 6 1.43e10 TRUE 2018-05-25 16:36:09 4.25 51.3 NA 32.5 2.2 -#> 7 1.43e10 TRUE 2018-05-25 16:40:57 4.25 51.3 NA 32.1 2.2 -#> 8 1.43e10 TRUE 2018-05-25 16:45:55 4.25 51.3 NA 33.3 2.1 -#> 9 1.43e10 TRUE 2018-05-25 16:50:49 4.25 51.3 NA 32.6 2.1 -#> 10 1.43e10 TRUE 2018-05-25 16:55:36 4.25 51.3 NA 31.7 2 -#> # … with 73,037 more rows, 13 more variables: `gps:satellite-count` , +#> `event-id` visible timestamp `location-long` `location-lat` +#> +#> 1 14256075762 TRUE 2018-05-25 16:11:37 4.25 51.3 +#> 2 14256075763 TRUE 2018-05-25 16:16:41 4.25 51.3 +#> 3 14256075764 TRUE 2018-05-25 16:21:29 4.25 51.3 +#> 4 14256075765 TRUE 2018-05-25 16:26:28 4.25 51.3 +#> 5 14256075766 TRUE 2018-05-25 16:31:21 4.25 51.3 +#> 6 14256075767 TRUE 2018-05-25 16:36:09 4.25 51.3 +#> 7 14256075768 TRUE 2018-05-25 16:40:57 4.25 51.3 +#> 8 14256075769 TRUE 2018-05-25 16:45:55 4.25 51.3 +#> 9 14256075770 TRUE 2018-05-25 16:50:49 4.25 51.3 +#> 10 14256075771 TRUE 2018-05-25 16:55:36 4.25 51.3 +#> # ℹ 73,037 more rows +#> # ℹ 16 more variables: `bar:barometric-pressure` , +#> # `external-temperature` , `gps:dop` , `gps:satellite-count` , #> # `gps-time-to-fix` , `ground-speed` , heading , #> # `height-above-msl` , `location-error-numerical` , #> # `manually-marked-outlier` , `vertical-error-numerical` , -#> # `sensor-type` , `individual-taxon-canonical-name` , -#> # `tag-local-identifier` , `individual-local-identifier` , -#> # `study-name` , and abbreviated variable names ¹​`event-id`, … +#> # `sensor-type` , `individual-taxon-canonical-name` , … ``` The data frame contains all GPS records, even though the actual data were split over [multiple CSV zipped files](https://zenodo.org/record/5879096#files). `read_resource()` assigned the column names and types based on the Table Schema that was defined for that resource, not the headers of the CSV file. You can also read data from a local (e.g. downloaded) Data Package. In fact, there is one included in the frictionless pkg, let's read that one from disk: -```r +``` r local_package <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") ) -#> Please make sure you have the right to access data from this Data Package for your intended use. -#> Follow applicable norms or requirements to credit the dataset and its authors. + +local_package +#> A Data Package with 3 resources: +#> • deployments +#> • observations +#> • media +#> Use `unclass()` to print the Data Package as a list. + read_resource(local_package, "media") #> # A tibble: 3 × 5 -#> media_id deployment_id observati…¹ times…² file_…³ -#> -#> 1 aed5fa71-3ed4-4284-a6ba-3550d1a4de8d 1 1-1 2020-0… https:… -#> 2 da81a501-8236-4cbd-aa95-4bc4b10a05df 1 1-1 2020-0… https:… -#> 3 0ba57608-3cf1-49d6-a5a2-fe680851024d 1 1-1 2020-0… https:… -#> # … with abbreviated variable names ¹​observation_id, ²​timestamp, ³​file_path +#> media_id deployment_id observation_id timestamp file_path +#> +#> 1 aed5fa71-3ed4-4284-a6ba-3550… 1 1-1 2020-09-… https://… +#> 2 da81a501-8236-4cbd-aa95-4bc4… 1 1-1 2020-09-… https://… +#> 3 0ba57608-3cf1-49d6-a5a2-fe68… 1 1-1 2020-09-… https://… ``` Data from the `media` was not stored in a CSV file, but directly in the `data` property of that resource in `datapackage.json`. `read_resource()` will automatically detect where to read data from.