diff --git a/R/PipeOpNMF.R b/R/PipeOpNMF.R index e708af7ea..aa2beb689 100644 --- a/R/PipeOpNMF.R +++ b/R/PipeOpNMF.R @@ -125,10 +125,8 @@ PipeOpNMF = R6Class("PipeOpNMF", track = p_lgl(default = FALSE, tags = c("train", "nmf.options")), verbose = p_uty(default = FALSE, tags = c("train", "nmf.options")), pbackend = p_uty(tags = c("train", "nmf")), # .pbackend - callback = p_uty(tags = c("train", "nmf")) # .callback + callback = p_uty(tags = c("train", "nmf"), depends = keep.all == TRUE) # .callback ) - ps$add_dep("keep.all", on = "nrun", cond = CondLarger$new(1)) - ps$add_dep("callback", on = "keep.all", cond = CondEqual$new(TRUE)) ps$values = list(rank = 2L, method = "brunet", parallel = FALSE, parallel.required = FALSE) super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"), packages = c("MASS", "NMF")) } @@ -179,10 +177,11 @@ PipeOpNMF = R6Class("PipeOpNMF", mlr_pipeops$add("nmf", PipeOpNMF) -CondLarger = R6Class("CondLarger", inherit = Condition, - public = list( - initialize = function(rhs) super$initialize("larger", rhs), - test = function(x) !is.na(x) & x > self$rhs, - as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) - ) -) +# this is just a really bad idea +## CondLarger = R6Class("CondLarger", inherit = Condition, +## public = list( +## initialize = function(rhs) super$initialize("larger", rhs), +## test = function(x) !is.na(x) & x > self$rhs, +## as_string = function(lhs_chr = "x") sprintf("%s > %s", lhs_chr, as.character(self$rhs)) +## ) +## ) diff --git a/R/PipeOpTextVectorizer.R b/R/PipeOpTextVectorizer.R index 8c4437fc6..eb1531714 100644 --- a/R/PipeOpTextVectorizer.R +++ b/R/PipeOpTextVectorizer.R @@ -187,7 +187,8 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", skip = p_uty(default = 0, tags = c("train", "predict", "ngrams"), custom_check = curry(check_integerish, min.len = 1, lower = 0, any.missing = FALSE)), sparsity = p_dbl(lower = 0, upper = 1, default = NULL, - tags = c("train", "dfm_trim"), special_vals = list(NULL)), + tags = c("train", "dfm_trim"), special_vals = list(NULL), + depends = return_type == "bow"), termfreq_type = p_fct(default = "count", tags = c("train", "dfm_trim"), levels = c("count", "prop", "rank", "quantile")), min_termfreq = p_dbl(lower = 0, default = NULL, @@ -197,29 +198,21 @@ PipeOpTextVectorizer = R6Class("PipeOpTextVectorizer", scheme_df = p_fct(default = "count", tags = c("train", "docfreq"), levels = c("count", "inverse", "inversemax", "inverseprob", "unary")), - smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), - k_df = p_dbl(lower = 0, tags = c("train", "docfreq")), - threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq")), - base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq")), + smoothing_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), + k_df = p_dbl(lower = 0, tags = c("train", "docfreq"), depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), + threshold_df = p_dbl(lower = 0, default = 0, tags = c("train", "docfreq"), depends = scheme_df == "count"), + base_df = p_dbl(lower = 0, default = 10, tags = c("train", "docfreq"), + depends = scheme_df %in% c("inverse", "inversemax", "inverseprob")), - scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight"), + scheme_tf = p_fct(default = "count", tags = c("train", "predict", "dfm_weight", depends = return_type == "bow"), levels = c("count", "prop", "propmax", "logcount", "boolean", "augmented", "logave")), - k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight")), - base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight")), + k_tf = p_dbl(lower = 0, upper = 1, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf == "augmented"), + base_tf = p_dbl(lower = 0, default = 10, tags = c("train", "predict", "dfm_weight"), depends = scheme_tf %in% c("logcount", "logave")), return_type = p_fct(levels = c("bow", "integer_sequence", "factor_sequence"), tags = c("train", "predict")), - sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence")) - )$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("smoothing_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("k_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("base_df", "scheme_df", CondAnyOf$new(c("inverse", "inversemax", "inverseprob")))$ - add_dep("threshold_df", "scheme_df", CondEqual$new("count"))$ - add_dep("k_tf", "scheme_tf", CondEqual$new("augmented"))$ - add_dep("base_tf", "scheme_tf", CondAnyOf$new(c("logcount", "logave")))$ - add_dep("scheme_tf", "return_type", CondEqual$new("bow"))$ - add_dep("sparsity", "return_type", CondEqual$new("bow"))$ - add_dep("sequence_length", "return_type", CondAnyOf$new(c("integer_sequence", "factor_sequence"))) + sequence_length = p_int(default = 0, lower = 0, upper = Inf, tags = c("train", "predict", "integer_sequence"), + depends = return_type %in% c("integer_sequence", "factor_sequence")) + ) ps$values = list(stopwords_language = "smart", extra_stopwords = character(0), n = 1, scheme_df = "unary", return_type = "bow") super$initialize(id = id, param_set = ps, param_vals = param_vals, packages = c("quanteda", "stopwords"), feature_types = "character") diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 440e2b6df..e4891eb6f 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -120,7 +120,7 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", ps = private$.make_param_set(pred) measure = self$param_set$values$measure if (is.character(measure)) measure = msr(measure) else measure - codomain = do.call(ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) + codomain = do.call(paradox::ps, structure(list(p_dbl(tags = ifelse(measure$minimize, "minimize", "maximize"))), names = measure$id)) objfun = bbotk::ObjectiveRFun$new( fun = function(xs) private$.objfun(xs, pred = pred, measure = measure), diff --git a/R/PipeOpVtreat.R b/R/PipeOpVtreat.R index 473723ebd..8a87e9ba5 100644 --- a/R/PipeOpVtreat.R +++ b/R/PipeOpVtreat.R @@ -136,7 +136,7 @@ PipeOpVtreat = R6Class("PipeOpVtreat", smFactor = p_dbl(lower = 0, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareCount = p_int(lower = 0L, upper = Inf, default = 0, tags = c("train", "regression", "classification", "multinomial")), rareSig = p_dbl(lower = 0, upper = 1, special_vals = list(NULL), tags = c("train", "regression", "classification", "multinomial")), # default NULL for regression, classification, 1 for multinomial - collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial")), + collarProb = p_dbl(lower = 0, upper = 1, default = 0, tags = c("train", "regression", "classification", "multinomial"), depends = doCollar == TRUE), doCollar = p_lgl(default = FALSE, tags = c("train", "regression", "classification", "multinomial")), codeRestriction = p_uty(default = NULL, custom_check = function(x) checkmate::check_character(x, any.missing = FALSE, null.ok = TRUE), tags = c("train", "regression", "classification", "multinomial")), @@ -160,7 +160,6 @@ PipeOpVtreat = R6Class("PipeOpVtreat", imputation_map = p_uty(default = NULL, custom_check = function(x) checkmate::check_list(x, null.ok = TRUE), tags = c("train", "predict")) # NOTE: parallelCluster missing intentionally and will be set to NULL ) - ps$add_dep("collarProb", on = "doCollar", cond = CondEqual$new(TRUE)) ps$values = list(recommended = TRUE, cols_to_copy = selector_none()) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "vtreat", tags = c("encode", "missings")) } diff --git a/tests/testthat/helper_test_pipeops.R b/tests/testthat/helper_test_pipeops.R index 0589a0791..5265177bb 100644 --- a/tests/testthat/helper_test_pipeops.R +++ b/tests/testthat/helper_test_pipeops.R @@ -45,7 +45,7 @@ PipeOpDebugMulti = R6Class("PipeOpDebugMulti", p = ps(par = p_int(lower = 0, upper = 10, default = 0, tags = c("train", "predict"))) self$nin = length(inputs) self$nout = length(outputs) - super$initialize(id, p), + super$initialize(id, param_set = p, input = data.table(name = inputs, train = "*", predict = "*"), output = data.table(name = outputs, train = "*", predict = "*")) }),