From d35f2f185058a1226923511a62aa593889c2d6c6 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Tue, 12 Nov 2024 23:42:26 -0600 Subject: [PATCH] move combine_words() and write_bib() to xfun --- DESCRIPTION | 3 +- NEWS.md | 4 + R/citation.R | 172 +---------------------------------- R/output.R | 2 +- R/utils-string.R | 8 -- R/utils.R | 55 +---------- inst/misc/tweak_bib.csv | 44 --------- man/combine_words.Rd | 44 +-------- man/write_bib.Rd | 94 +------------------ tests/testit/test-citation.R | 10 -- tests/testit/test-utils.R | 15 --- 11 files changed, 20 insertions(+), 431 deletions(-) delete mode 100644 inst/misc/tweak_bib.csv delete mode 100644 tests/testit/test-citation.R diff --git a/DESCRIPTION b/DESCRIPTION index 2b4e39eb3a..3a143e24e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -163,10 +163,10 @@ SystemRequirements: Package vignettes based on R Markdown v2 or reStructuredText Collate: 'block.R' 'cache.R' - 'utils.R' 'citation.R' 'hooks-html.R' 'plot.R' + 'utils.R' 'defaults.R' 'concordance.R' 'engine.R' @@ -199,3 +199,4 @@ Collate: 'utils-vignettes.R' 'zzz.R' RoxygenNote: 7.3.2 +Remotes: yihui/xfun diff --git a/NEWS.md b/NEWS.md index 20641fe94a..ed758ce8f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # CHANGES IN knitr VERSION 1.50 +## MINOR CHANGES + +- Moved implementations of `combine_words()` and `write_bib()` to the **xfun** package as `xfun::join_words()` and `xfun::pkg_bib()`, respectively, since they are not directly relevant to **knitr**. The functions `combine_words()` and `write_bib()` are still kept in **knitr**, and can continue to be used in the future. + # CHANGES IN knitr VERSION 1.49 ## NEW FEATURES diff --git a/R/citation.R b/R/citation.R index 2128349b5f..a19a1fd129 100644 --- a/R/citation.R +++ b/R/citation.R @@ -1,172 +1,8 @@ #' Generate BibTeX bibliography databases for R packages #' -#' This function uses \code{utils::\link{citation}()} and -#' \code{utils::\link{toBibtex}()} to create bib entries for R packages and -#' write them in a file. It can facilitate the auto-generation of bibliography -#' databases for R packages, and it is easy to regenerate all the citations -#' after updating R packages. -#' -#' For a package, the keyword \samp{R-pkgname} is used for its bib item, where -#' \samp{pkgname} is the name of the package. Citation entries specified in the -#' \file{CITATION} file of the package are also included. The main purpose of -#' this function is to automate the generation of the package citation -#' information because it often changes (e.g. author, year, package version, -#' ...). -#' -#' There are at least two different uses for the URL in a reference list. You -#' might want to tell users where to go for more information; in that case, use -#' the default \code{packageURL = TRUE}, and the first URL listed in the -#' \file{DESCRIPTION} file will be used. Be careful: some authors don't put the -#' most relevant URL first. Alternatively, you might want to identify exactly -#' which version of the package was used in the document. If it was installed -#' from CRAN or some other repositories, the version number identifies it, and -#' \code{packageURL = FALSE} will use the repository URL (as used by -#' \code{utils::\link{citation}()}). -#' -#' @param x Package names. Packages which are not installed are ignored. -#' @param file The (\file{.bib}) file to write. By default, or if \code{NULL}, -#' output is written to the R console. -#' @param tweak Whether to fix some known problems in the citations, especially -#' non-standard format of author names. -#' @param width Width of lines in bibliography entries. If \code{NULL}, lines -#' will not be wrapped. -#' @param prefix Prefix string for keys in BibTeX entries; by default, it is -#' \samp{R-} unless \code{\link{option}('knitr.bib.prefix')} has been set to -#' another string. -#' @param lib.loc A vector of path names of R libraries. -#' @param packageURL Use the \code{URL} field from the \file{DESCRIPTION} file. -#' See Details below. -#' @return A list containing the citations. Citations are also written to the -#' \code{file} as a side effect. -#' @note Some packages on CRAN do not have standard bib entries, which was once -#' reported by Michael Friendly at -#' \url{https://stat.ethz.ch/pipermail/r-devel/2010-November/058977.html}. I -#' find this a real pain, and there are no easy solutions except contacting -#' package authors to modify their DESCRIPTION files. Anyway, the argument -#' \code{tweak} has provided ugly hacks to deal with packages which are known -#' to be non-standard in terms of the format of citations; \code{tweak = TRUE} -#' is by no means intended to hide or modify the original citation -#' information. It is just due to the loose requirements on package authors -#' for the DESCRIPTION file. On one hand, I apologize if it really mangles the -#' information about certain packages; on the other, I strongly recommend -#' package authors to consider the \samp{Authors@@R} field (see the manual -#' \emph{Writing R Extensions}) to make it easier for other people to cite R -#' packages. See \code{knitr:::.tweak.bib} for details of tweaks. Also note -#' this is subject to future changes since R packages are being updated. If -#' you want to contribute more tweaks, please edit the file -#' \file{inst/misc/tweak_bib.csv} in the source package. +#' A wrapper function of \code{xfun::pkg_bib()}. +#' @param ...,prefix Arguments passed to \code{xfun::\link[xfun]{pkg_bib}()}. #' @export -#' @author Yihui Xie and Michael Friendly -#' @examplesIf interactive() -#' write_bib(c('RGtk2', 'gWidgets'), file = 'R-GUI-pkgs.bib') -#' unlink('R-GUI-pkgs.bib') -#' -#' write_bib(c('animation', 'rgl', 'knitr', 'ggplot2')) -#' write_bib(c('base', 'parallel', 'MASS')) # base and parallel are identical -#' write_bib('cluster', prefix = '') # a empty prefix -#' write_bib('digest', prefix = 'R-pkg-') # a new prefix -#' write_bib('digest', tweak = FALSE) # original version -#' -#' # what tweak=TRUE does -#' str(knitr:::.tweak.bib) -write_bib = function( - x = .packages(), file = '', tweak = TRUE, width = NULL, - prefix = getOption('knitr.bib.prefix', 'R-'), lib.loc = NULL, - packageURL = TRUE -) { - system.file = function(...) base::system.file(..., lib.loc = lib.loc) - citation = function(...) utils::citation(..., lib.loc = lib.loc) - x = x[nzchar(x)] # remove possible empty string - idx = mapply(system.file, package = x) == '' - if (any(idx)) { - warning('package(s) ', paste(x[idx], collapse = ', '), ' not found') - x = x[!idx] - } - # no need to write bib for packages in base R other than `base` itself - x = setdiff(x, setdiff(xfun::base_pkgs(), 'base')) - x = sort(x) - bib = sapply(x, function(pkg) { - meta = packageDescription(pkg, lib.loc = lib.loc) - # don't use the citation() URL if the package has provided its own URL - cite = citation(pkg, auto = if (is.null(meta$URL)) meta else { - if (packageURL) meta$Repository = meta$RemoteType = NULL - # use the first URL in case the package provided multiple URLs - meta$URL = sub('[, \t\n].*', '', meta$URL) - meta - }) - if (tweak) { - # e.g. gpairs has "gpairs: " in the title - cite$title = gsub(sprintf('^(%s: )(\\1)', pkg), '\\1', cite$title) - } - entry = toBibtex(cite) - entry[1] = sub('\\{,$', sprintf('{%s%s,', prefix, pkg), entry[1]) - entry - }, simplify = FALSE) - if (tweak) { - for (i in intersect(names(.tweak.bib), x)) { - message('tweaking ', i) - bib[[i]] = merge_list(bib[[i]], .tweak.bib[[i]]) - } - bib = lapply(bib, function(b) { - b['author'] = sub('Duncan Temple Lang', 'Duncan {Temple Lang}', b['author']) - # remove the ugly single quotes required by CRAN policy - b['title'] = gsub("(^|\\W)'([^']+)'(\\W|$)", '\\1\\2\\3', b['title']) - # keep the first URL if multiple are provided - if (!is.na(b['note'])) b['note'] = gsub( - '(^.*?https?://.*?),\\s+https?://.*?(},\\s*)$', '\\1\\2', b['note'] - ) - if (!('year' %in% names(b))) b['year'] = .this.year - b - }) - } - # also read citation entries from the CITATION file if provided - bib2 = lapply(x, function(pkg) { - if (pkg == 'base') return() - if (system.file('CITATION', package = pkg) == '') return() - cites = citation(pkg, auto = FALSE) - cites = Filter(x = cites, function(cite) { - # exclude entries identical to citation(pkg, auto = TRUE) - !isTRUE(grepl('R package version', cite$note)) - }) - s = make_unique(unlist(lapply(cites, function(cite) { - if (is.null(cite$year)) format(Sys.Date(), '%Y') else cite$year - }))) - mapply(cites, s, FUN = function(cite, suffix) { - # the entry is likely to be the same as citation(pkg, auto = TRUE) - if (isTRUE(grepl('R package version', cite$note))) return() - entry = toBibtex(cite) - entry[1] = sub('\\{,$', sprintf('{%s%s,', pkg, suffix), entry[1]) - entry - }, SIMPLIFY = FALSE) - }) - bib = c(bib, unlist(bib2, recursive = FALSE)) - bib = lapply(bib, function(b) { - idx = which(names(b) == '') - if (!is.null(width)) b[-idx] = str_wrap(b[-idx], width, 2, 4) - lines = c(b[idx[1L]], b[-idx], b[idx[2L]], '') - if (tweak) { - # e.g. KernSmooth and spam has & in the title and the journal, respectively - lines = gsub('(? c(1a, 1b, 1c, 2a, 3a, 3b) -make_unique = function(x) { - if (length(x) == 0) return(x) - x2 = make.unique(x) - if (all(i <- x2 == x)) return(x) - x2[i] = paste0(x2[i], '.0') - i = as.numeric(sub('.*[.]([0-9]+)$', '\\1', x2)) + 1 - s = letters[i] - s = ifelse(is.na(s), i, s) - paste0(x, s) -} - #' Encode an image file to a data URI #' #' This function is the same as \code{xfun::\link[xfun]{base64_uri}()} (only with a diff --git a/inst/misc/tweak_bib.csv b/inst/misc/tweak_bib.csv deleted file mode 100644 index a167151df9..0000000000 --- a/inst/misc/tweak_bib.csv +++ /dev/null @@ -1,44 +0,0 @@ -"package","author" -"ade4","Stéphane Dray and Anne-Béatrice Dufour and Jean Thioulouse and Thibaut Jombart and Sandrine Pavoine and Jean R. Lobry and Sébastien Ollier and Aurélie Siberchicot and Daniel Chessel" -"akima","H. Akima and Albrecht Gebhardt and Thomas Petzoldt and Martin Maechler" -"ash","David W. Scott and Albrecht Gebhardt and Stephen Kaluzny" -"bcpa","Jose Claudio Faria and Clarice Garcia Borges Demetrio" -"BiplotGUI","Anthony la Grange and N. J. le Roux and P.J. Rousseeuw and I. Ruts and J. W. Tukey" -"bitops","Steve Dutky and Martin Maechler" -"cacheSweave","Roger D. Peng" -"cat","Ted Harding and Fernando Tusell and Joseph L. Schafer" -"CircStats","Ulric Lund and Claudio Agostinelli" -"contrast","Max Kuhn and Steve Weston and Jed Wing and James Forester" -"date","Terry Therneau and Thomas Lumley and Kjetil Halvorsen and Kurt Hornik" -"digest","Dirk Eddelbuettel" -"ElemStatLearn","Kjetil Halvorsen" -"epiR","Mark Stevenson and Telmo Nunes and Cord Heuer and Jonathon Marshall and Javier Sanchez and Ron Thornton and Jeno Reiczigel and Jim Robison-Cox and Paola Sebastiani and Peter Solymos" -"Fahrmeir","Kjetil Halvorsen" -"flashClust","Fionn Murtagh and {R development team} and Peter Langfelder" -"foreach","{Revolution Analytics} and Steve Weston}" -"fortunes","Achim Zeileis and the R community" -"gee","Vincent J Carey and Thomas Lumley and Brian Ripley" -"gmodels","Gregory R. Warnes andBen Bolker and Thomas Lumley and Randall C Johnson and Randall C. Johnson" -"gWidgets","John Verzani" -"hexbin","Dan Carr and Nicholas Lewin-Koh and Martin Maechler" -"Hmisc","Harrell, Jr., Frank E" -"Hmisc","Frank E. {Harrell, Jr.}" -"leaps","Thomas Lumley" -"mapproj","Doug McIlroy and Ray Brownrigg and Thomas P Minka and Roger Bivand" -"maps","Ray Brownrigg" -"mathgraph","Patrick J. Burns and Nick Efthymiou and Claus Dethlefsen" -"oz","Bill Venables and Kurt Hornik" -"pbivnorm","Alan Genz and Brenton Kenkel" -"pscl","Simon Jackman and Alex Tahk and Achim Zeileis and Christina Maimone and Jim Fearon" -"quadprog","Berwin A. Turlach and Andreas Weingessel" -"R2SWF","Yixuan Qiu and Yihui Xie and Cameron Bracken" -"R2WinBUGS","Andrew Gelman and Sibylle Sturtz and Uwe Ligges and Gregor Gorjanc and Jouni Kerman" -"randomForest","Leo Breiman and Adele Cutler and Andy Liaw and Matthew Wiener" -"rgl","Daniel Adler and Duncan Murdoch" -"RgoogleMaps","Markus Loecher" -"rms","Frank E. {Harrell, Jr.}" -"robustbase","Valentin Todorov and Andreas Ruckstuhl and Matias Salibian-Barrera and Tobias Verbeke and Manuel Koller and Martin Maechler" -"RODBC","Brian Ripley and Michael Lapsley" -"Sleuth2","F. L. Ramsey and D. W. Schafer and Jeannie Sifneos and Berwin A. Turlach" -"sm","Adrian Bowman and Adelchi Azzalini" -"tuneR","Uwe Ligges" diff --git a/man/combine_words.Rd b/man/combine_words.Rd index 0f37339a0a..acc791476c 100644 --- a/man/combine_words.Rd +++ b/man/combine_words.Rd @@ -4,49 +4,11 @@ \alias{combine_words} \title{Combine multiple words into a single string} \usage{ -combine_words( - words, - sep = ", ", - and = " and ", - before = "", - after = before, - oxford_comma = TRUE -) +combine_words(...) } \arguments{ -\item{words}{A character vector.} - -\item{sep}{Separator to be inserted between words.} - -\item{and}{Character string to be prepended to the last word.} - -\item{before, after}{A character string to be added before/after each word.} - -\item{oxford_comma}{Whether to insert the separator between the last two -elements in the list.} -} -\value{ -A character string marked by \code{xfun::\link[xfun]{raw_string}()}. +\item{...}{Arguments passed to \code{xfun::\link[xfun]{join_words}()}.} } \description{ -When a value from an inline R expression is a character vector of multiple -elements, we may want to combine them into a phrase like \samp{a and b}, or -\code{a, b, and c}. That is what this a helper function does. -} -\details{ -If the length of the input \code{words} is smaller than or equal to 1, -\code{words} is returned. When \code{words} is of length 2, the first word -and second word are combined using the \code{and} string, or if blank, -\code{sep} if is used. When the length is greater than 2, \code{sep} is used -to separate all words, and the \code{and} string is prepended to the last -word. -} -\examples{ -combine_words("a") -combine_words(c("a", "b")) -combine_words(c("a", "b", "c")) -combine_words(c("a", "b", "c"), sep = " / ", and = "") -combine_words(c("a", "b", "c"), and = "") -combine_words(c("a", "b", "c"), before = "\"", after = "\"") -combine_words(c("a", "b", "c"), before = "\"", after = "\"", oxford_comma = FALSE) +This is a wrapper function of \code{xfun::join_words()}. } diff --git a/man/write_bib.Rd b/man/write_bib.Rd index 659a7c729e..e77bb7669f 100644 --- a/man/write_bib.Rd +++ b/man/write_bib.Rd @@ -4,99 +4,11 @@ \alias{write_bib} \title{Generate BibTeX bibliography databases for R packages} \usage{ -write_bib( - x = .packages(), - file = "", - tweak = TRUE, - width = NULL, - prefix = getOption("knitr.bib.prefix", "R-"), - lib.loc = NULL, - packageURL = TRUE -) +write_bib(..., prefix = getOption("knitr.bib.prefix", "R-")) } \arguments{ -\item{x}{Package names. Packages which are not installed are ignored.} - -\item{file}{The (\file{.bib}) file to write. By default, or if \code{NULL}, -output is written to the R console.} - -\item{tweak}{Whether to fix some known problems in the citations, especially -non-standard format of author names.} - -\item{width}{Width of lines in bibliography entries. If \code{NULL}, lines -will not be wrapped.} - -\item{prefix}{Prefix string for keys in BibTeX entries; by default, it is -\samp{R-} unless \code{\link{option}('knitr.bib.prefix')} has been set to -another string.} - -\item{lib.loc}{A vector of path names of R libraries.} - -\item{packageURL}{Use the \code{URL} field from the \file{DESCRIPTION} file. -See Details below.} -} -\value{ -A list containing the citations. Citations are also written to the - \code{file} as a side effect. +\item{..., prefix}{Arguments passed to \code{xfun::\link[xfun]{pkg_bib}()}.} } \description{ -This function uses \code{utils::\link{citation}()} and -\code{utils::\link{toBibtex}()} to create bib entries for R packages and -write them in a file. It can facilitate the auto-generation of bibliography -databases for R packages, and it is easy to regenerate all the citations -after updating R packages. -} -\details{ -For a package, the keyword \samp{R-pkgname} is used for its bib item, where -\samp{pkgname} is the name of the package. Citation entries specified in the -\file{CITATION} file of the package are also included. The main purpose of -this function is to automate the generation of the package citation -information because it often changes (e.g. author, year, package version, -...). - -There are at least two different uses for the URL in a reference list. You -might want to tell users where to go for more information; in that case, use -the default \code{packageURL = TRUE}, and the first URL listed in the -\file{DESCRIPTION} file will be used. Be careful: some authors don't put the -most relevant URL first. Alternatively, you might want to identify exactly -which version of the package was used in the document. If it was installed -from CRAN or some other repositories, the version number identifies it, and -\code{packageURL = FALSE} will use the repository URL (as used by -\code{utils::\link{citation}()}). -} -\note{ -Some packages on CRAN do not have standard bib entries, which was once - reported by Michael Friendly at - \url{https://stat.ethz.ch/pipermail/r-devel/2010-November/058977.html}. I - find this a real pain, and there are no easy solutions except contacting - package authors to modify their DESCRIPTION files. Anyway, the argument - \code{tweak} has provided ugly hacks to deal with packages which are known - to be non-standard in terms of the format of citations; \code{tweak = TRUE} - is by no means intended to hide or modify the original citation - information. It is just due to the loose requirements on package authors - for the DESCRIPTION file. On one hand, I apologize if it really mangles the - information about certain packages; on the other, I strongly recommend - package authors to consider the \samp{Authors@R} field (see the manual - \emph{Writing R Extensions}) to make it easier for other people to cite R - packages. See \code{knitr:::.tweak.bib} for details of tweaks. Also note - this is subject to future changes since R packages are being updated. If - you want to contribute more tweaks, please edit the file - \file{inst/misc/tweak_bib.csv} in the source package. -} -\examples{\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -write_bib(c("RGtk2", "gWidgets"), file = "R-GUI-pkgs.bib") -unlink("R-GUI-pkgs.bib") - -write_bib(c("animation", "rgl", "knitr", "ggplot2")) -write_bib(c("base", "parallel", "MASS")) # base and parallel are identical -write_bib("cluster", prefix = "") # a empty prefix -write_bib("digest", prefix = "R-pkg-") # a new prefix -write_bib("digest", tweak = FALSE) # original version - -# what tweak=TRUE does -str(knitr:::.tweak.bib) -\dontshow{\}) # examplesIf} -} -\author{ -Yihui Xie and Michael Friendly +A wrapper function of \code{xfun::pkg_bib()}. } diff --git a/tests/testit/test-citation.R b/tests/testit/test-citation.R deleted file mode 100644 index 7a44dc1394..0000000000 --- a/tests/testit/test-citation.R +++ /dev/null @@ -1,10 +0,0 @@ -library(testit) - -pkgs = c(rownames(installed.packages(priority = 'high')), 'evaluate', 'knitr') -write_bib(pkgs, tempfile(), tweak = FALSE) - -assert('& is escaped in title when write_bib(tweak = TRUE)', { - (length(grep(' & ', grep( - '^ title =', capture.output(write_bib(pkgs, tweak = TRUE)), value = TRUE - ))) %==% 0L) -}) diff --git a/tests/testit/test-utils.R b/tests/testit/test-utils.R index cd34b15de2..23c44f65d2 100644 --- a/tests/testit/test-utils.R +++ b/tests/testit/test-utils.R @@ -120,21 +120,6 @@ assert('color_def() generates LaTeX code to define a color variable', { (color_def('.5,.6,.7', 'fgcolor') %==% '\\definecolor{fgcolor}{rgb}{.5, .6, .7}') }) -cw = function(...) unclass(combine_words(...)) -assert('combine_words() combines multiple words into a single string', { - (cw(NULL) %==% NULL) - (cw(c('a')) %==% 'a') - (cw(c('a', 'b')) %==% 'a and b') - (cw(c('a', 'b'), and = "") %==% 'a, b') - (cw(c('a', 'b', 'c')) %==% 'a, b, and c') - (cw(c('a', 'b', 'c'), and = '') %==% 'a, b, c') - (cw(c('a', 'b', 'c'), ' / ', '') %==% 'a / b / c') - (cw(c('a', 'b', 'c'), before = '"') %==% '"a", "b", and "c"') - (cw(c('a', 'b', 'c'), before = '``', after = "''") %==% "``a'', ``b'', and ``c''") - (cw(c('a', 'b', 'c'), before = '``', after = "''", oxford_comma = FALSE) %==% "``a'', ``b'' and ``c''") -}) -rm(list = 'cw') - opts = list( fig.cap = 'Figure "caption" <>.', fig.lp = 'Fig:', label = 'foo' )