gesistsa · chainsawriot · Feb 9, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 9, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -29,8 +29,8 @@
 ^CRAN-RELEASE$
 ^cran-comments\.md$
 ^data/abstracts_topicmodels\.rda$
-^data/abstracts_seededlda\.rda$
 ^data/abstracts_unseededlda\.rda$
+^data/abstracts_keyatm\.rda$
 ^data/abstracts_warplda\.rda$
 ^data/abstracts_stm\.rda$
 ^tests/testthat/apps/

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: oolong
 Title: Create Validation Tests for Automated Content Analysis
-Version: 0.5.1
+Version: 0.6.0
 Authors@R: 
     c(person(given = "Chung-hong", family = "Chan", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-6232-7530")),
     person(given = "Marius", family = "Sältzer", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-8604-4666")))
@@ -10,9 +10,9 @@ Encoding: UTF-8
 URL: https://gesistsa.github.io/oolong, https://github.com/gesistsa/oolong
 LazyData: true
 Depends:
-    R (>= 4.0)
-Imports: 
-    keyATM (>= 0.2.2),
+    R (>= 3.5.0)
+Imports:
+    seededlda,
     purrr,
     tibble,
     shiny,
@@ -26,14 +26,14 @@ Imports:
     stats,
     utils
 RoxygenNote: 7.3.1
-Suggests: 
+Suggests:
+    keyATM (>= 0.2.2),
     testthat (>= 3.0.2),
     text2vec (>= 0.6),
     BTM,
     dplyr,
     topicmodels,
     stm,
-    seededlda,
     covr,
     stringr,
     knitr,

diff --git a/R/oolong.R b/R/oolong.R
@@ -71,21 +71,23 @@ Oolong_test <- R6::R6Class(
 #' @return an oolong test object.
 #' @examples
 #' ## Creation of oolong test with only word intrusion test
-#' data(abstracts_keyatm)
+#' data(abstracts_seededlda)
 #' data(abstracts)
-#' oolong_test <- wi(input_model = abstracts_keyatm, userid = "Hadley")
+#' oolong_test <- wi(input_model = abstracts_seededlda, userid = "Hadley")
 #' ## Creation of oolong test with both word intrusion test and topic intrusion test
-#' oolong_test <- witi(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Julia")
+#' oolong_test <- witi(input_model = abstracts_seededlda,
+#' input_corpus = abstracts$text, userid = "Julia")
 #' ## Creation of oolong test with topic intrusion test
-#' oolong_test <- ti(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Jenny")
+#' oolong_test <- ti(input_model = abstracts_seededlda,
+#' input_corpus = abstracts$text, userid = "Jenny")
 #' ## Creation of oolong test with word set intrusion test
-#' oolong_test <- wsi(input_model = abstracts_keyatm, userid = "Garrett")
+#' oolong_test <- wsi(input_model = abstracts_seededlda, userid = "Garrett")
 #' ## Creation of gold standard
 #' oolong_test <- gs(input_corpus = trump2k, userid = "Yihui")
 #' ## Using create_oolong(); not recommended
-#' oolong_test <- create_oolong(input_model = abstracts_keyatm,
+#' oolong_test <- create_oolong(input_model = abstracts_seededlda,
 #' input_corpus = abstracts$text, userid = "JJ")
-#' oolong_test <- create_oolong(input_model = abstracts_keyatm,
+#' oolong_test <- create_oolong(input_model = abstracts_seededlda,
 #' input_corpus = abstracts$text, userid = "Mara", type = "ti")
 #' oolong_test <- create_oolong(input_corpus = abstracts$text, userid = "Winston", type = "gs")
 #' @author Chung-hong Chan, Marius Sältzer

diff --git a/R/oolong_btm.R b/R/oolong_btm.R
@@ -4,7 +4,7 @@
     K <- input_model$K
     better_theta <- dirty_theta[match(quanteda::docid(input_corpus), row.names(dirty_theta)),]
     ## replace NA value with ambiguous theta, i.e. 1/K
-    better_theta[is.na(better_theta)] <- 1/K
+    better_theta[is.na(better_theta)] <- 1 / K
     rownames(better_theta) <- quanteda::docid(input_corpus)
     return(better_theta)
 }
@@ -13,19 +13,19 @@
     input_model <- input_model_s3$model
     K <- input_model$K
     V <- input_model$W
-    terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x){
+    terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x) {
         x <- data.frame(token = names(x), probability = x)
         x <- x[order(x$probability, decreasing = TRUE), ]
         x <- x$token
         head(x, n = length(x))
     }))
     all_terms <- unique(as.vector(terms[,seq_len(n_top_terms)]))
     if (need_topic) {
-        if (is.null(input_corpus) | is.null(btm_dataframe) | !"corpus" %in% class(input_corpus)) {
+        if (is.null(input_corpus) || is.null(btm_dataframe) || !"corpus" %in% class(input_corpus)) {
             .cstop(TRUE, "You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests.")
         }
         model_terms <- terms[, seq_len(n_topiclabel_words)]
-        theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus)        
+        theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus)
     } else {
         model_terms <- NULL
         theta <- NULL

diff --git a/R/oolong_data_misc.R b/R/oolong_data_misc.R
@@ -15,9 +15,9 @@
 #' Topic models trained with the abstracts dataset.
 #'
 #' These are topic models trained with different topic model packages.
-"abstracts_keyatm"
+"abstracts_seededlda"
 
-#' @rdname abstracts_keyatm
+#' @rdname abstracts_seededlda
 "abstracts_btm"
 
 #' AFINN dictionary
@@ -44,7 +44,7 @@
 #' @importFrom quanteda print corpus
 NULL
 
-utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_keyatm', 'abstracts'))
+utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_seededlda', 'abstracts'))
 
 ### print the ... if boolean_test is true
 .cp <- function(boolean_test, ...) {
@@ -89,4 +89,3 @@ utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'a
     }
     return(digest::digest(x, algo = "sha1"))
 }
-
diff --git a/R/oolong_update.R b/R/oolong_update.R
@@ -42,7 +42,7 @@ update_oolong <- function(oolong, verbose = TRUE) {
     .cstop(!.check_oolong(oolong$.__enclos_env__$private), "This oolong object does not need to be updated.")
     if ("oolong_test_tm" %in% class(oolong)) {
         ## generate a dummy oolong object
-        new_oolong <- create_oolong(abstracts_keyatm)
+        new_oolong <- create_oolong(abstracts_seededlda)
         new_oolong$.__enclos_env__$private$finalized <- oolong$.__enclos_env__$private$finalized
         new_oolong$.__enclos_env__$private$test_content <- oolong$.__enclos_env__$private$test_content
         ## renaming test_content
@@ -65,7 +65,7 @@ update_oolong <- function(oolong, verbose = TRUE) {
         new_oolong$.__enclos_env__$private$construct <- oolong$.__enclos_env__$private$construct
         new_oolong$.__enclos_env__$private$hash <- .safe_hash(new_oolong$.__enclos_env__$private$test_content)
         new_oolong$.__enclos_env__$private$hash_input_corpus <- oolong$.__enclos_env__$private$hash_input_corpus
-        new_oolong$.__enclos_env__$private$meta <- .generate_meta()      
+        new_oolong$.__enclos_env__$private$meta <- .generate_meta()
     }
     if (is.null(new_oolong$userid)) {
         new_oolong$userid <- NA

diff --git a/man/abstracts_keyatm.Rd → man/abstracts_seededlda.Rd b/man/abstracts_keyatm.Rd → man/abstracts_seededlda.Rd
diff --git a/man/create_oolong.Rd b/man/create_oolong.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -1,4 +1,4 @@
 library(testthat)
 library(oolong)
 
-test_check("oolong", report = "minimal")
+test_check("oolong")
diff --git a/tests/testthat/_snaps/printing.md b/tests/testthat/_snaps/printing.md
@@ -72,7 +72,7 @@
 # ti only
 
     Code
-      create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text,
+      create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text,
       type = "ti")
     Message
 
@@ -88,7 +88,7 @@
 # wsi only
 
     Code
-      create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text,
+      create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text,
       type = "wsi", wsi_n_top_terms = 100)
     Message
 

diff --git a/tests/testthat/apps/ti/app.R b/tests/testthat/apps/ti/app.R
@@ -1,5 +1,5 @@
 library(oolong)
 
-x <- ti(abstracts_keyatm, abstracts$text, exact_n = 10)
+x <- ti(abstracts_seededlda, abstracts$text, exact_n = 10)
 
 x$do_topic_intrusion_test()
diff --git a/tests/testthat/apps/wi/app.R b/tests/testthat/apps/wi/app.R
@@ -1,5 +1,5 @@
 library(oolong)
 
-x <- wi(abstracts_keyatm)
+x <- wi(abstracts_seededlda)
 
 x$do_word_intrusion_test()
diff --git a/tests/testthat/apps/wsi/app.R b/tests/testthat/apps/wsi/app.R
@@ -1,5 +1,5 @@
 require(oolong)
 
-x <- wsi(abstracts_keyatm)
+x <- wsi(abstracts_seededlda)
 
 x$do_word_set_intrusion_test()
diff --git a/tests/testthat/test-defensive_programming.R b/tests/testthat/test-defensive_programming.R
@@ -19,10 +19,10 @@ test_that("precondiction", {
 
 test_that("locking", {
     ## premature locking
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     expect_error(x$lock())
     expect_error(x$lock(force = TRUE), NA)
-    x <- create_oolong(abstracts_keyatm, abstracts$text)
+    x <- create_oolong(abstracts_seededlda, abstracts$text)
     expect_error(x$lock())
     ## error when only word intrusion test is done.
     x <- genius_word(x)
@@ -32,16 +32,16 @@ test_that("locking", {
 })
 
 test_that("cloning", {
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     expect_error(clone_oolong(x), NA)
     x <- genius_word(x)
     ## Cannot clone a partially coded object.
     expect_error(clone_oolong(x))
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     x$lock(force = TRUE)
     expect_error(clone_oolong(x))
     ## Cloned object is not sharing the same private space.
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     y <- clone_oolong(x)
     x$lock(force = TRUE)
     expect_true(x$.__enclos_env__$private$finalized)
@@ -50,88 +50,88 @@ test_that("cloning", {
 
 test_that("cloning all types", {
     ## pure wi
-    x <- wi(abstracts_keyatm)
+    x <- wi(abstracts_seededlda)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
     expect_error(clone_oolong(x))
     ## pure ti
-    x <- ti(abstracts_keyatm, abstracts$text)
+    x <- ti(abstracts_seededlda, abstracts$text)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
     expect_error(clone_oolong(x))
     ## witi
-    x <- witi(abstracts_keyatm, abstracts$text)
+    x <- witi(abstracts_seededlda, abstracts$text)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
     expect_error(clone_oolong(x))
-    x <- witi(abstracts_keyatm, abstracts$text)
+    x <- witi(abstracts_seededlda, abstracts$text)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
     expect_error(clone_oolong(x))
-    x <- witi(abstracts_keyatm, abstracts$text)
+    x <- witi(abstracts_seededlda, abstracts$text)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
     x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
     expect_error(clone_oolong(x))
     ## wsi
-    x <- wsi(abstracts_keyatm)
+    x <- wsi(abstracts_seededlda)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$wsi$answer[1] <- "x"
     expect_error(clone_oolong(x))
     ## gs
     x <- gs(abstracts$text)
     expect_error(clone_oolong(x), NA)
     x$.__enclos_env__$private$test_content$gs$answer[1] <- 1
-    expect_error(clone_oolong(x))    
+    expect_error(clone_oolong(x))
 })
 
 test_that("Can't launch $do_topic_intrusion_test() when no test content", {
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     expect_error(x$do_topic_intrusion_test())
 })
 
 test_that("Can't launch $do_word_set_intrusion_test() when no test content", {
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     expect_error(x$do_word_set_intrusion_test())
 })
 
 test_that("Can't launch $do_word_intrusion_test() when no test content", {
-    x <- wsi(abstracts_keyatm)
+    x <- wsi(abstracts_seededlda)
     expect_error(x$do_word_intrusion_test())
 })
 
 test_that("hash function", {
     expect_true(is.null(.safe_hash(NULL)))
-    expect_type(.safe_hash(abstracts_keyatm), "character")
+    expect_type(.safe_hash(abstracts_seededlda), "character")
 })
 
 test_that("hash_input_model tm", {
     ## TI
-    x <- create_oolong(abstracts_keyatm)
+    x <- create_oolong(abstracts_seededlda)
     expect_false(is.null(x$.__enclos_env__$private$hash_input_model))
-    expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm))
+    expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda))
     ## WITI
-    x <- create_oolong(abstracts_keyatm, abstracts$text)
+    x <- create_oolong(abstracts_seededlda, abstracts$text)
     expect_false(is.null(x$.__enclos_env__$private$hash_input_model))
     expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus))
-    expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm))
+    expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda))
     expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))
 })
 
 test_that("hash_input_corpus gs", {
     x <- create_oolong(input_corpus = abstracts$text)
     expect_true(is.null(x$.__enclos_env__$private$hash_input_model))
     expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus))
-    expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))    
+    expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))
 })
 
 test_that("invalid type", {
-    expect_error(create_oolong(abstracts_keyatm, type = "1111"))
-    expect_error(create_oolong(abstracts_keyatm, type = NA))
+    expect_error(create_oolong(abstracts_seededlda, type = "1111"))
+    expect_error(create_oolong(abstracts_seededlda, type = NA))
 })
 
 test_that("userid",  {
-    expect_error(wi(abstracts_keyatm, userid = c("a", "b")))
-    expect_error(wi(abstracts_keyatm, userid = "a"), NA)
+    expect_error(wi(abstracts_seededlda, userid = c("a", "b")))
+    expect_error(wi(abstracts_seededlda, userid = "a"), NA)
     expect_error(wsi(abtracts_stm, abstracts$text))
 })