From cf0133c34b2386ca8a1f40a55bf1c6e52975b853 Mon Sep 17 00:00:00 2001 From: hrbrmstr Date: Thu, 23 Aug 2018 12:39:23 -0400 Subject: [PATCH] gepetto sneak peek --- DESCRIPTION | 6 +- NAMESPACE | 13 ++ R/decapitated-package.R | 4 +- R/gepetto.R | 279 +++++++++++++++++++++++++++++++++++++++ R/util.r | 5 + R/utils-pipe.R | 11 ++ R/utils-safely.R | 90 +++++++++++++ R/zzz.R | 2 + README.Rmd | 14 ++ README.md | 31 +++-- man/gep_active.Rd | 23 ++++ man/gep_debug.Rd | 24 ++++ man/gep_render_har.Rd | 30 +++++ man/gep_render_html.Rd | 27 ++++ man/gep_render_magick.Rd | 27 ++++ man/gep_render_pdf.Rd | 33 +++++ man/gepetto.Rd | 27 ++++ man/pipe.Rd | 12 ++ man/print.gepetto.Rd | 17 +++ 19 files changed, 663 insertions(+), 12 deletions(-) create mode 100644 R/gepetto.R create mode 100644 R/utils-pipe.R create mode 100644 R/utils-safely.R create mode 100644 man/gep_active.Rd create mode 100644 man/gep_debug.Rd create mode 100644 man/gep_render_har.Rd create mode 100644 man/gep_render_html.Rd create mode 100644 man/gep_render_magick.Rd create mode 100644 man/gep_render_pdf.Rd create mode 100644 man/gepetto.Rd create mode 100644 man/pipe.Rd create mode 100644 man/print.gepetto.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 00268eb..04a08d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,9 +23,13 @@ Suggests: Depends: R (>= 3.2.0) Imports: + httr, + jsonlite, + HARtools, xml2, magick, processx, tools, - utils + utils, + magrittr RoxygenNote: 6.0.1.9000 diff --git a/NAMESPACE b/NAMESPACE index 1ae3f1e..0abb529 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,27 @@ # Generated by roxygen2: do not edit by hand +S3method(print,gepetto) +export("%>%") export(chrome_dump_pdf) export(chrome_read_html) export(chrome_shot) export(chrome_version) export(download_chromium) +export(gep_active) +export(gep_debug) +export(gep_render_har) +export(gep_render_html) +export(gep_render_magick) +export(gep_render_pdf) +export(gepetto) export(get_chrome_env) export(set_chrome_env) +import(httr) import(magick) import(processx) import(tools) import(utils) import(xml2) +importFrom(HARtools,readHAR) +importFrom(jsonlite,fromJSON) +importFrom(magrittr,"%>%") diff --git a/R/decapitated-package.R b/R/decapitated-package.R index 402e3a7..f23e53d 100644 --- a/R/decapitated-package.R +++ b/R/decapitated-package.R @@ -44,5 +44,7 @@ #' @name decapitated #' @docType package #' @author Bob Rudis (bob@@rud.is) -#' @import xml2 magick processx tools utils +#' @import xml2 magick processx tools utils httr +#' @importFrom jsonlite fromJSON +#' @importFrom HARtools readHAR NULL \ No newline at end of file diff --git a/R/gepetto.R b/R/gepetto.R new file mode 100644 index 0000000..ce1b41c --- /dev/null +++ b/R/gepetto.R @@ -0,0 +1,279 @@ +#' Create a connection to a Gepetto API server +#' +#' @md +#' @param host where is it running? Defaults to "`localhost`" +#' @param port same, but what port? Defaults to `8080` since the most common +#' use case is that you have `gepetto` running in a Docker container. +#' Use `3000` if you're running it locally via `npm` as that's the default +#' port for development.` +#' @return A `gepetto` connection object +#' @export +#' @examples \dontrun{ +#' gepetto() +#' } +gepetto <- function(host = "localhost", port = 8080) { + + list( + host = host, + port = port + ) -> out + + class(out) <- c("gepetto") + + out + +} + +#' Print +#' @md +#' @param x `gepetto` object +#' @param ... unused +#' @keywords internal +#' @export +print.gepetto <- function(x, ...) { + cat("\n", sep="") +} + +#' Render a page in a javascript context and serialize to HTML +#' +#' @md +#' @param gep a gepetto connection object +#' @param url the URL to fetch and render +#' @param width,height viewport width/height +#' @return HTML +#' @export +#' @examples \dontrun{ +#' gepetto(port=3000) %>% +#' gep_render_html("https://r-project.org/") +#' } +gep_render_html <- function(gep, url, width=1440, height=5000) { + + httr::GET( + url = sprintf("http://%s:%s/render_html", gep$host, gep$port), + query = list( + url = url, + width = width, + height = height + ) + + ) -> res + + httr::stop_for_status(res) + + out <- httr::content(res, as="text") + out <- xml2::read_html(out) + + out + +} + +#' Render a page in a javascript context and serialize to HAR +#' +#' TODO: Modify the `puppeteer-har` node module to allow for saving content +#' +#' @md +#' @param gep a gepetto connection object +#' @param url the URL to fetch and render +#' @param width,height viewport width/height +#' @return HAR +#' @note content is not returned, just HAR information +#' @export +#' @examples \dontrun{ +#' gepetto(port=3000) %>% +#' gep_render_har("https://r-project.org/") +#' } +gep_render_har <- function(gep, url, width=1440, height=5000) { + + httr::GET( + url = sprintf("http://%s:%s/render_har", gep$host, gep$port), + query = list( + url = url, + width = width, + height = height + ) + ) -> res + + httr::stop_for_status(res) + + out <- httr::content(res, as="text") + out <- HARtools::readHAR(out) + + out + +} + +#' Render a page in a javascript context and take a screenshot +#' +#' @md +#' @param gep a gepetto connection object +#' @param url the URL to fetch and render +#' @param width,height viewport width/height +#' @return `magick` image +#' @export +#' @examples \dontrun{ +#' gepetto(port=3000) %>% +#' gep_render_magick("https://r-project.org/") +#' } +gep_render_magick <- function(gep, url, width=1440, height=5000) { + httr::GET( + url = sprintf("http://%s:%s/render_png", gep$host, gep$por), + query = list( + url = url, + width = width, + height = height + ) + ) -> res + httr::stop_for_status(res) + out <- httr::content(res) + out <- magick::image_read(out) + out +} + +# #' Take a screenshot of the current browser page +# #' +# #' @md +# #' @param gep a gepetto connection object +# #' @return `magick` image +# #' @export +# #' @examples \dontrun{ +# #' gepetto(port=3000) %>% +# #' gep_screenshot() +# #' } +# gep_screenshot <- function(gep) { +# +# httr::GET( +# url = sprintf("http://%s:%s/screenshot", gep$host, gep$por), +# ) -> res +# +# httr::stop_for_status(res) +# +# out <- httr::content(res) +# out <- magick::image_read(out) +# out +# +# } + +#' Render a page in a javascript context and rendero to PDF +#' +#' @md +#' @param gep a gepetto connection object +#' @param url the URL to fetch and render +#' @param path directory & filename to save the PDF to. If `NULL` will be saved +#' to a tempfile and it location will be returned. +#' @param overwrite if `TRUE` any existing `path` (file) will be overwritten +#' @param width,height viewport width/height +#' @return object +#' @export +#' @examples \dontrun{ +#' gepetto(port=3000) %>% +#' gep_render_pdf("https://r-project.org/") +#' } +gep_render_pdf <- function(gep, url, path=NULL, overwrite=TRUE, width=1440, height=5000) { + + if (is.null(path)) { + path <- tempfile(fileext = ".pdf") + } else { + path <- path.expand(path) + } + + httr::GET( + url = sprintf("http://%s:%s/render_pdf", gep$host, gep$por), + query = list( + url = url, + width = width, + height = height + ), + httr::write_disk(path = path) + ) -> res + + httr::stop_for_status(res) + + path + +} + +#' Get "debug-level" information of a running gepetto server +#' +#' @md +#' @param gep a gepetto connection object +#' @return debug info +#' @export +#' @examples \dontrun{ +#' gepetto() %>% +#' gep_debug() %>% +#' str() +#' } +gep_debug <- function(gep) { + + httr::GET( + url = sprintf("http://%s:%s/_debug", gep$host, gep$port) + ) -> res + + httr::stop_for_status(res) + + out <- httr::content(res, as="text") + out <- jsonlite::fromJSON(out) + + out + +} + +#' Get test whether the gepetto server is active +#' +#' @md +#' @param gep a gepetto connection object +#' @return logical (`TRUE` if alive) +#' @export +#' @examples \dontrun{ +#' gepetto() %>% +#' gep_active() +#' } +gep_active <- function(gep) { + + s_GET( + url = sprintf("http://%s:%s/_ping", gep$host, gep$port) + ) -> res + + res <- stop_for_problem(res) + + httr::stop_for_status(res) + + out <- httr::content(res, as="text") + out <- jsonlite::fromJSON(out) + + out$status == "ok" + +} + + +#' #' Execute Puppeteer commands +#' #' +#' #' This is a **low-level** call that makes **you** responsible for the return +#' #' type. Eventually there will likely be more boilerplate code for handling return +#' #' values. +#' #' +#' #' @md +#' #' @param gep a gepetto connection object +#' #' @param js Puppeteer js to execute in-browser +#' #' @references [Puppeteer API](https://github.com/GoogleChrome/puppeteer/blob/v1.7.0/docs/api.md) +#' #' @export +#' #' @examples \dontrun{ +#' #' gepetto() %>% +#' #' gep_exec() +#' #' } +#' gep_exec <- function(gep, js) { +#' +#' httr::POST( +#' url = sprintf("http://%s:%s/exec", gep$host, gep$port), +#' encode = "form", +#' body = js +#' ) -> res +#' +#' httr::stop_for_status(res) +#' +#' out <- httr::content(res, as="text") +#' # out <- jsonlite::fromJSON(out) +#' # +#' out +#' +#' } \ No newline at end of file diff --git a/R/util.r b/R/util.r index f5e9426..3ff43f9 100644 --- a/R/util.r +++ b/R/util.r @@ -1,3 +1,8 @@ + +stop_for_problem <- function(res) { + if (is.null(res$result)) stop(res$error$message, call.=FALSE) else res$result +} + .get_app_dir <- function() { ddir <- file.path(Sys.getenv("HOME"), ".rdecapdata") if (!dir.exists(ddir)) { diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000..fb8c818 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,11 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +NULL diff --git a/R/utils-safely.R b/R/utils-safely.R new file mode 100644 index 0000000..8e7f90d --- /dev/null +++ b/R/utils-safely.R @@ -0,0 +1,90 @@ +# Less cool counterparts to purrr's side-effect capture-rs +# +# Most of the helper functions are 100% from output.R in purrr repo +# +# @param quiet Hide errors (`TRUE`, the default), or display them +# as they occur? +# @param otherwise Default value to use when an error occurs. +# +# @return `safely`: wrapped function instead returns a list with +# components `result` and `error`. One value is always `NULL`. +# +# `quietly`: wrapped function instead returns a list with components +# `result`, `output`, `messages` and `warnings`. +# +# `possibly`: wrapped function uses a default value (`otherwise`) +# whenever an error occurs. +safely <- function(.f, otherwise = NULL, quiet = TRUE) { + function(...) capture_error(.f(...), otherwise, quiet) +} + +quietly <- function(.f) { + function(...) capture_output(.f(...)) +} + +possibly <- function(.f, otherwise, quiet = TRUE) { + force(otherwise) + function(...) { + tryCatch(.f(...), + error = function(e) { + if (!quiet) + message("Error: ", e$message) + otherwise + }, + interrupt = function(e) { + stop("Terminated by user", call. = FALSE) + } + ) + } +} + +capture_error <- function(code, otherwise = NULL, quiet = TRUE) { + tryCatch( + list(result = code, error = NULL), + error = function(e) { + if (!quiet) + message("Error: ", e$message) + + list(result = otherwise, error = e) + }, + interrupt = function(e) { + stop("Terminated by user", call. = FALSE) + } + ) +} + +capture_output <- function(code) { + warnings <- character() + wHandler <- function(w) { + warnings <<- c(warnings, w$message) + invokeRestart("muffleWarning") + } + + messages <- character() + mHandler <- function(m) { + messages <<- c(messages, m$message) + invokeRestart("muffleMessage") + } + + temp <- file() + sink(temp) + on.exit({ + sink() + close(temp) + }) + + result <- withCallingHandlers( + code, + warning = wHandler, + message = mHandler + ) + + output <- paste0(readLines(temp, warn = FALSE), collapse = "\n") + + list( + result = result, + output = output, + warnings = warnings, + messages = messages + ) +} diff --git a/R/zzz.R b/R/zzz.R index 06e9d3c..5a3d1c1 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,5 @@ +s_GET <- safely(httr::GET) + .onAttach <- function(libname, pkgname) { HEADLESS_CHROME <- Sys.getenv("HEADLESS_CHROME") diff --git a/README.Rmd b/README.Rmd index 98c5434..ebfffcc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -62,6 +62,8 @@ Chrome. The following functions are implemented: +### CLI-based ops + - `downlaod_chromium`: Download a standalone version of Chromium (recommended) - `chrome_dump_pdf`: "Print" to PDF - `chrome_read_html`: Read a URL via headless Chrome and return the raw or rendered '' 'innerHTML' DOM elements @@ -70,6 +72,18 @@ The following functions are implemented: - `get_chrome_env`: get an envrionment variable 'HEADLESS_CHROME' - `set_chrome_env`: set an envrionment variable 'HEADLESS_CHROME' +### `gepetto`-based ops + +- `gepetto`: Create a connection to a Gepetto API server +- `gep_active`: Get test whether the gepetto server is active +- `gep_debug`: Get "debug-level" information of a running gepetto server +- `gep_render_har`: Render a page in a javascript context and serialize to HAR +- `gep_render_html`: Render a page in a javascript context and serialize to HTML +- `gep_render_magick`: Render a page in a javascript context and take a screenshot +- `gep_render_pdf`: Render a page in a javascript context and rendero to PDF + +More information on `gepetto` is forthcoming but you can take a sneak peek [here](https://gitlab.com/hrbrmstr/gepetto). + ## Installation ```{r eval=FALSE} diff --git a/README.md b/README.md index 4c84b90..e33ff04 100644 --- a/README.md +++ b/README.md @@ -63,16 +63,27 @@ control over the command-line execution of headless Chrome. The following functions are implemented: - - `downlaod_chromium`: Download a standalone version of Chromium (recommended) - - `chrome_dump_pdf`: “Print” to PDF - - `chrome_read_html`: Read a URL via headless Chrome and return the - raw or rendered ’ - - ‘’innerHTML’ DOM elements - - `chrome_shot`: Capture a screenshot - - `chrome_version`: Get Chrome version - - `get_chrome_env`: get an envrionment variable ‘HEADLESS\_CHROME’ - - `set_chrome_env`: set an envrionment variable ‘HEADLESS\_CHROME’ +### CLI-based ops + +- `downlaod_chromium`: Download a standalone version of Chromium (recommended) +- `chrome_dump_pdf`: "Print" to PDF +- `chrome_read_html`: Read a URL via headless Chrome and return the raw or rendered '' 'innerHTML' DOM elements +- `chrome_shot`: Capture a screenshot +- `chrome_version`: Get Chrome version +- `get_chrome_env`: get an envrionment variable 'HEADLESS_CHROME' +- `set_chrome_env`: set an envrionment variable 'HEADLESS_CHROME' + +### `gepetto`-based ops + +- `gepetto`: Create a connection to a Gepetto API server +- `gep_active`: Get test whether the gepetto server is active +- `gep_debug`: Get "debug-level" information of a running gepetto server +- `gep_render_har`: Render a page in a javascript context and serialize to HAR +- `gep_render_html`: Render a page in a javascript context and serialize to HTML +- `gep_render_magick`: Render a page in a javascript context and take a screenshot +- `gep_render_pdf`: Render a page in a javascript context and rendero to PDF + +More information on `gepetto` is forthcoming but you can take a sneak peek [here](https://gitlab.com/hrbrmstr/gepetto). ## Installation diff --git a/man/gep_active.Rd b/man/gep_active.Rd new file mode 100644 index 0000000..5f9cc0f --- /dev/null +++ b/man/gep_active.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_active} +\alias{gep_active} +\title{Get test whether the gepetto server is active} +\usage{ +gep_active(gep) +} +\arguments{ +\item{gep}{a gepetto connection object} +} +\value{ +logical (\code{TRUE} if alive) +} +\description{ +Get test whether the gepetto server is active +} +\examples{ +\dontrun{ +gepetto() \%>\% + gep_active() +} +} diff --git a/man/gep_debug.Rd b/man/gep_debug.Rd new file mode 100644 index 0000000..1580d2a --- /dev/null +++ b/man/gep_debug.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_debug} +\alias{gep_debug} +\title{Get "debug-level" information of a running gepetto server} +\usage{ +gep_debug(gep) +} +\arguments{ +\item{gep}{a gepetto connection object} +} +\value{ +debug info +} +\description{ +Get "debug-level" information of a running gepetto server +} +\examples{ +\dontrun{ +gepetto() \%>\% + gep_debug() \%>\% + str() +} +} diff --git a/man/gep_render_har.Rd b/man/gep_render_har.Rd new file mode 100644 index 0000000..c41e6ff --- /dev/null +++ b/man/gep_render_har.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_render_har} +\alias{gep_render_har} +\title{Render a page in a javascript context and serialize to HAR} +\usage{ +gep_render_har(gep, url, width = 1440, height = 5000) +} +\arguments{ +\item{gep}{a gepetto connection object} + +\item{url}{the URL to fetch and render} + +\item{width, height}{viewport width/height} +} +\value{ +HAR +} +\description{ +TODO: Modify the \code{puppeteer-har} node module to allow for saving content +} +\note{ +content is not returned, just HAR information +} +\examples{ +\dontrun{ +gepetto(port=3000) \%>\% + gep_render_har("https://r-project.org/") +} +} diff --git a/man/gep_render_html.Rd b/man/gep_render_html.Rd new file mode 100644 index 0000000..39c627b --- /dev/null +++ b/man/gep_render_html.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_render_html} +\alias{gep_render_html} +\title{Render a page in a javascript context and serialize to HTML} +\usage{ +gep_render_html(gep, url, width = 1440, height = 5000) +} +\arguments{ +\item{gep}{a gepetto connection object} + +\item{url}{the URL to fetch and render} + +\item{width, height}{viewport width/height} +} +\value{ +HTML +} +\description{ +Render a page in a javascript context and serialize to HTML +} +\examples{ +\dontrun{ +gepetto(port=3000) \%>\% + gep_render_html("https://r-project.org/") +} +} diff --git a/man/gep_render_magick.Rd b/man/gep_render_magick.Rd new file mode 100644 index 0000000..19a42ba --- /dev/null +++ b/man/gep_render_magick.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_render_magick} +\alias{gep_render_magick} +\title{Render a page in a javascript context and take a screenshot} +\usage{ +gep_render_magick(gep, url, width = 1440, height = 5000) +} +\arguments{ +\item{gep}{a gepetto connection object} + +\item{url}{the URL to fetch and render} + +\item{width, height}{viewport width/height} +} +\value{ +\code{magick} image +} +\description{ +Render a page in a javascript context and take a screenshot +} +\examples{ +\dontrun{ +gepetto(port=3000) \%>\% + gep_render_magick("https://r-project.org/") +} +} diff --git a/man/gep_render_pdf.Rd b/man/gep_render_pdf.Rd new file mode 100644 index 0000000..8ccaa9f --- /dev/null +++ b/man/gep_render_pdf.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gep_render_pdf} +\alias{gep_render_pdf} +\title{Render a page in a javascript context and rendero to PDF} +\usage{ +gep_render_pdf(gep, url, path = NULL, overwrite = TRUE, width = 1440, + height = 5000) +} +\arguments{ +\item{gep}{a gepetto connection object} + +\item{url}{the URL to fetch and render} + +\item{path}{directory & filename to save the PDF to. If \code{NULL} will be saved +to a tempfile and it location will be returned.} + +\item{overwrite}{if \code{TRUE} any existing \code{path} (file) will be overwritten} + +\item{width, height}{viewport width/height} +} +\value{ +object +} +\description{ +Render a page in a javascript context and rendero to PDF +} +\examples{ +\dontrun{ +gepetto(port=3000) \%>\% + gep_render_pdf("https://r-project.org/") +} +} diff --git a/man/gepetto.Rd b/man/gepetto.Rd new file mode 100644 index 0000000..6fca5b5 --- /dev/null +++ b/man/gepetto.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{gepetto} +\alias{gepetto} +\title{Create a connection to a Gepetto API server} +\usage{ +gepetto(host = "localhost", port = 8080) +} +\arguments{ +\item{host}{where is it running? Defaults to "\code{localhost}"} + +\item{port}{same, but what port? Defaults to \code{8080} since the most common +use case is that you have \code{gepetto} running in a Docker container. +Use \code{3000} if you're running it locally via \code{npm} as that's the default +port for development.`} +} +\value{ +A \code{gepetto} connection object +} +\description{ +Create a connection to a Gepetto API server +} +\examples{ +\dontrun{ +gepetto() +} +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..b7daf6a --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +See \code{magrittr::\link[magrittr]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/print.gepetto.Rd b/man/print.gepetto.Rd new file mode 100644 index 0000000..eb7f807 --- /dev/null +++ b/man/print.gepetto.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gepetto.R +\name{print.gepetto} +\alias{print.gepetto} +\title{Print} +\usage{ +\method{print}{gepetto}(x, ...) +} +\arguments{ +\item{x}{\code{gepetto} object} + +\item{...}{unused} +} +\description{ +Print +} +\keyword{internal}