Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #81 #84

Merged
merged 5 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
^CRAN-RELEASE$
^cran-comments\.md$
^data/abstracts_topicmodels\.rda$
^data/abstracts_seededlda\.rda$
^data/abstracts_unseededlda\.rda$
^data/abstracts_keyatm\.rda$
^data/abstracts_warplda\.rda$
^data/abstracts_stm\.rda$
^tests/testthat/apps/
Expand Down
12 changes: 6 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: oolong
Title: Create Validation Tests for Automated Content Analysis
Version: 0.5.1
Version: 0.6.0
Authors@R:
c(person(given = "Chung-hong", family = "Chan", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-6232-7530")),
person(given = "Marius", family = "Sältzer", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-8604-4666")))
Expand All @@ -10,9 +10,9 @@ Encoding: UTF-8
URL: https://gesistsa.github.io/oolong, https://github.com/gesistsa/oolong
LazyData: true
Depends:
R (>= 4.0)
Imports:
keyATM (>= 0.2.2),
R (>= 3.5.0)
Imports:
seededlda,
purrr,
tibble,
shiny,
Expand All @@ -26,14 +26,14 @@ Imports:
stats,
utils
RoxygenNote: 7.3.1
Suggests:
Suggests:
keyATM (>= 0.2.2),
testthat (>= 3.0.2),
text2vec (>= 0.6),
BTM,
dplyr,
topicmodels,
stm,
seededlda,
covr,
stringr,
knitr,
Expand Down
16 changes: 9 additions & 7 deletions R/oolong.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,23 @@ Oolong_test <- R6::R6Class(
#' @return an oolong test object.
#' @examples
#' ## Creation of oolong test with only word intrusion test
#' data(abstracts_keyatm)
#' data(abstracts_seededlda)
#' data(abstracts)
#' oolong_test <- wi(input_model = abstracts_keyatm, userid = "Hadley")
#' oolong_test <- wi(input_model = abstracts_seededlda, userid = "Hadley")
#' ## Creation of oolong test with both word intrusion test and topic intrusion test
#' oolong_test <- witi(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Julia")
#' oolong_test <- witi(input_model = abstracts_seededlda,
#' input_corpus = abstracts$text, userid = "Julia")
#' ## Creation of oolong test with topic intrusion test
#' oolong_test <- ti(input_model = abstracts_keyatm, input_corpus = abstracts$text, userid = "Jenny")
#' oolong_test <- ti(input_model = abstracts_seededlda,
#' input_corpus = abstracts$text, userid = "Jenny")
#' ## Creation of oolong test with word set intrusion test
#' oolong_test <- wsi(input_model = abstracts_keyatm, userid = "Garrett")
#' oolong_test <- wsi(input_model = abstracts_seededlda, userid = "Garrett")
#' ## Creation of gold standard
#' oolong_test <- gs(input_corpus = trump2k, userid = "Yihui")
#' ## Using create_oolong(); not recommended
#' oolong_test <- create_oolong(input_model = abstracts_keyatm,
#' oolong_test <- create_oolong(input_model = abstracts_seededlda,
#' input_corpus = abstracts$text, userid = "JJ")
#' oolong_test <- create_oolong(input_model = abstracts_keyatm,
#' oolong_test <- create_oolong(input_model = abstracts_seededlda,
#' input_corpus = abstracts$text, userid = "Mara", type = "ti")
#' oolong_test <- create_oolong(input_corpus = abstracts$text, userid = "Winston", type = "gs")
#' @author Chung-hong Chan, Marius Sältzer
Expand Down
8 changes: 4 additions & 4 deletions R/oolong_btm.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
K <- input_model$K
better_theta <- dirty_theta[match(quanteda::docid(input_corpus), row.names(dirty_theta)),]
## replace NA value with ambiguous theta, i.e. 1/K
better_theta[is.na(better_theta)] <- 1/K
better_theta[is.na(better_theta)] <- 1 / K
rownames(better_theta) <- quanteda::docid(input_corpus)
return(better_theta)
}
Expand All @@ -13,19 +13,19 @@
input_model <- input_model_s3$model
K <- input_model$K
V <- input_model$W
terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x){
terms <- t(apply(input_model$phi, MARGIN = 2, FUN = function(x) {
x <- data.frame(token = names(x), probability = x)
x <- x[order(x$probability, decreasing = TRUE), ]
x <- x$token
head(x, n = length(x))
}))
all_terms <- unique(as.vector(terms[,seq_len(n_top_terms)]))
if (need_topic) {
if (is.null(input_corpus) | is.null(btm_dataframe) | !"corpus" %in% class(input_corpus)) {
if (is.null(input_corpus) || is.null(btm_dataframe) || !"corpus" %in% class(input_corpus)) {
.cstop(TRUE, "You need to provide input_corpus (in quanteda format) and btm_dataframe for generating topic intrusion tests.")
}
model_terms <- terms[, seq_len(n_topiclabel_words)]
theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus)
theta <- .generate_btm_theta(input_model, btm_dataframe, input_corpus)
} else {
model_terms <- NULL
theta <- NULL
Expand Down
7 changes: 3 additions & 4 deletions R/oolong_data_misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#' Topic models trained with the abstracts dataset.
#'
#' These are topic models trained with different topic model packages.
"abstracts_keyatm"
"abstracts_seededlda"

#' @rdname abstracts_keyatm
#' @rdname abstracts_seededlda
"abstracts_btm"

#' AFINN dictionary
Expand All @@ -44,7 +44,7 @@
#' @importFrom quanteda print corpus
NULL

utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_keyatm', 'abstracts'))
utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'avg_answer', 'abstracts_seededlda', 'abstracts'))

### print the ... if boolean_test is true
.cp <- function(boolean_test, ...) {
Expand Down Expand Up @@ -89,4 +89,3 @@ utils::globalVariables(c('cookd', 'diffxy', 'index', 'meanxy', 'word_length', 'a
}
return(digest::digest(x, algo = "sha1"))
}

4 changes: 2 additions & 2 deletions R/oolong_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ update_oolong <- function(oolong, verbose = TRUE) {
.cstop(!.check_oolong(oolong$.__enclos_env__$private), "This oolong object does not need to be updated.")
if ("oolong_test_tm" %in% class(oolong)) {
## generate a dummy oolong object
new_oolong <- create_oolong(abstracts_keyatm)
new_oolong <- create_oolong(abstracts_seededlda)
new_oolong$.__enclos_env__$private$finalized <- oolong$.__enclos_env__$private$finalized
new_oolong$.__enclos_env__$private$test_content <- oolong$.__enclos_env__$private$test_content
## renaming test_content
Expand All @@ -65,7 +65,7 @@ update_oolong <- function(oolong, verbose = TRUE) {
new_oolong$.__enclos_env__$private$construct <- oolong$.__enclos_env__$private$construct
new_oolong$.__enclos_env__$private$hash <- .safe_hash(new_oolong$.__enclos_env__$private$test_content)
new_oolong$.__enclos_env__$private$hash_input_corpus <- oolong$.__enclos_env__$private$hash_input_corpus
new_oolong$.__enclos_env__$private$meta <- .generate_meta()
new_oolong$.__enclos_env__$private$meta <- .generate_meta()
}
if (is.null(new_oolong$userid)) {
new_oolong$userid <- NA
Expand Down
8 changes: 4 additions & 4 deletions man/abstracts_keyatm.Rd → man/abstracts_seededlda.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 9 additions & 7 deletions man/create_oolong.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
library(testthat)
library(oolong)

test_check("oolong", report = "minimal")
test_check("oolong")
4 changes: 2 additions & 2 deletions tests/testthat/_snaps/printing.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
# ti only

Code
create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text,
create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text,
type = "ti")
Message

Expand All @@ -88,7 +88,7 @@
# wsi only

Code
create_oolong(input_model = abstracts_keyatm, input_corpus = abstracts$text,
create_oolong(input_model = abstracts_seededlda, input_corpus = abstracts$text,
type = "wsi", wsi_n_top_terms = 100)
Message

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/apps/ti/app.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
library(oolong)

x <- ti(abstracts_keyatm, abstracts$text, exact_n = 10)
x <- ti(abstracts_seededlda, abstracts$text, exact_n = 10)

x$do_topic_intrusion_test()
2 changes: 1 addition & 1 deletion tests/testthat/apps/wi/app.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
library(oolong)

x <- wi(abstracts_keyatm)
x <- wi(abstracts_seededlda)

x$do_word_intrusion_test()
2 changes: 1 addition & 1 deletion tests/testthat/apps/wsi/app.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
require(oolong)

x <- wsi(abstracts_keyatm)
x <- wsi(abstracts_seededlda)

x$do_word_set_intrusion_test()
50 changes: 25 additions & 25 deletions tests/testthat/test-defensive_programming.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ test_that("precondiction", {

test_that("locking", {
## premature locking
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
expect_error(x$lock())
expect_error(x$lock(force = TRUE), NA)
x <- create_oolong(abstracts_keyatm, abstracts$text)
x <- create_oolong(abstracts_seededlda, abstracts$text)
expect_error(x$lock())
## error when only word intrusion test is done.
x <- genius_word(x)
Expand All @@ -32,16 +32,16 @@ test_that("locking", {
})

test_that("cloning", {
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
expect_error(clone_oolong(x), NA)
x <- genius_word(x)
## Cannot clone a partially coded object.
expect_error(clone_oolong(x))
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
x$lock(force = TRUE)
expect_error(clone_oolong(x))
## Cloned object is not sharing the same private space.
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
y <- clone_oolong(x)
x$lock(force = TRUE)
expect_true(x$.__enclos_env__$private$finalized)
Expand All @@ -50,88 +50,88 @@ test_that("cloning", {

test_that("cloning all types", {
## pure wi
x <- wi(abstracts_keyatm)
x <- wi(abstracts_seededlda)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
expect_error(clone_oolong(x))
## pure ti
x <- ti(abstracts_keyatm, abstracts$text)
x <- ti(abstracts_seededlda, abstracts$text)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
expect_error(clone_oolong(x))
## witi
x <- witi(abstracts_keyatm, abstracts$text)
x <- witi(abstracts_seededlda, abstracts$text)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
expect_error(clone_oolong(x))
x <- witi(abstracts_keyatm, abstracts$text)
x <- witi(abstracts_seededlda, abstracts$text)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
expect_error(clone_oolong(x))
x <- witi(abstracts_keyatm, abstracts$text)
x <- witi(abstracts_seededlda, abstracts$text)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$wi$answer[1] <- "x"
x$.__enclos_env__$private$test_content$ti$answer[1] <- "x"
expect_error(clone_oolong(x))
## wsi
x <- wsi(abstracts_keyatm)
x <- wsi(abstracts_seededlda)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$wsi$answer[1] <- "x"
expect_error(clone_oolong(x))
## gs
x <- gs(abstracts$text)
expect_error(clone_oolong(x), NA)
x$.__enclos_env__$private$test_content$gs$answer[1] <- 1
expect_error(clone_oolong(x))
expect_error(clone_oolong(x))
})

test_that("Can't launch $do_topic_intrusion_test() when no test content", {
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
expect_error(x$do_topic_intrusion_test())
})

test_that("Can't launch $do_word_set_intrusion_test() when no test content", {
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
expect_error(x$do_word_set_intrusion_test())
})

test_that("Can't launch $do_word_intrusion_test() when no test content", {
x <- wsi(abstracts_keyatm)
x <- wsi(abstracts_seededlda)
expect_error(x$do_word_intrusion_test())
})

test_that("hash function", {
expect_true(is.null(.safe_hash(NULL)))
expect_type(.safe_hash(abstracts_keyatm), "character")
expect_type(.safe_hash(abstracts_seededlda), "character")
})

test_that("hash_input_model tm", {
## TI
x <- create_oolong(abstracts_keyatm)
x <- create_oolong(abstracts_seededlda)
expect_false(is.null(x$.__enclos_env__$private$hash_input_model))
expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm))
expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda))
## WITI
x <- create_oolong(abstracts_keyatm, abstracts$text)
x <- create_oolong(abstracts_seededlda, abstracts$text)
expect_false(is.null(x$.__enclos_env__$private$hash_input_model))
expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus))
expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_keyatm))
expect_equal(x$.__enclos_env__$private$hash_input_model, .safe_hash(abstracts_seededlda))
expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))
})

test_that("hash_input_corpus gs", {
x <- create_oolong(input_corpus = abstracts$text)
expect_true(is.null(x$.__enclos_env__$private$hash_input_model))
expect_false(is.null(x$.__enclos_env__$private$hash_input_corpus))
expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))
expect_equal(x$.__enclos_env__$private$hash_input_corpus, .safe_hash(abstracts$text))
})

test_that("invalid type", {
expect_error(create_oolong(abstracts_keyatm, type = "1111"))
expect_error(create_oolong(abstracts_keyatm, type = NA))
expect_error(create_oolong(abstracts_seededlda, type = "1111"))
expect_error(create_oolong(abstracts_seededlda, type = NA))
})

test_that("userid", {
expect_error(wi(abstracts_keyatm, userid = c("a", "b")))
expect_error(wi(abstracts_keyatm, userid = "a"), NA)
expect_error(wi(abstracts_seededlda, userid = c("a", "b")))
expect_error(wi(abstracts_seededlda, userid = "a"), NA)
expect_error(wsi(abtracts_stm, abstracts$text))
})
Loading
Loading