diff --git a/NAMESPACE b/NAMESPACE index d105db3..4a9205e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,12 @@ # Generated by roxygen2: do not edit by hand -export(compute_centroids_and_proba) export(compute_density_ratio) export(create_models_tuning) -export(distance_to_gamma) -export(distinct_gamma) +export(distance_to_prototypes) +export(distinct_prototypes) export(estim_denom_centroid) export(estim_num_centroid) +export(find_prototypes) export(fit_metamodel) export(get_cell_numbers) export(get_probas) @@ -15,7 +15,6 @@ export(predict_outputs) export(probas_k_fold) export(probas_loo) export(probas_training_test) -export(proto_map_algo) export(quanti_error) export(rf_pred_k_fold) export(rf_pred_training_test) @@ -26,7 +25,7 @@ export(rf_rmse_training_test) export(rmse_k_fold) export(rmse_loo) export(rmse_training_test) -export(sort_gamma) +export(sort_prototypes) export(std_centroid) export(std_proba) import(DiceKriging) diff --git a/R/1d_to_matrix.R b/R/1d_to_matrix.R index 1f92e4a..16fdfc0 100644 --- a/R/1d_to_matrix.R +++ b/R/1d_to_matrix.R @@ -1,15 +1,15 @@ #' @title Transform every 1D prototype into a column matrix #' -#' @param gamma a list of prototypes +#' @param prototypes a list of prototypes #' #' @return A list of matrices prototypes #' @export #' #' @examples #' oned_to_matrix(list(1:5, runif(5), rep(0,5))) -oned_to_matrix = function(gamma){ - gamma = lapply(1:length(gamma), function(j){ - if(is.null(dim(gamma[[j]])) | length(dim(gamma[[j]])) == 1){t(as.matrix(gamma[[j]]))} - else{gamma[[j]]}}) - return(gamma) +oned_to_matrix = function(prototypes){ + prototypes = lapply(1:length(prototypes), function(j){ + if(is.null(dim(prototypes[[j]])) | length(dim(prototypes[[j]])) == 1){t(as.matrix(prototypes[[j]]))} + else{prototypes[[j]]}}) + return(prototypes) } diff --git a/R/compute_centroids_and_proba.R b/R/compute_centroids_and_proba.R index 609eb54..2f9cf3e 100644 --- a/R/compute_centroids_and_proba.R +++ b/R/compute_centroids_and_proba.R @@ -1,39 +1,34 @@ #' @title Compute the centroid and the probability mass of the Voronoï cells #' -#' @param outputs The output samples that need to be quantized. If method = "percell", a list of output samples must be provided, of length equal to the number of Voronoï cells. -#' @param cell_numbers The voronoi cell number of every output +#' @param data The data that needs to be quantized. If method = "percell", a list of data samples must be provided, of length equal to the number of Voronoï cells. +#' @param cell_numbers The voronoi cell number of every data element #' @param method_IS The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell. -#' @param density_ratio A vector indicating the weight fX/g of each output. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells. +#' @param density_ratio A vector indicating the weight fX/g of each data element. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells. #' @param bias A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells. +#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False. #' @return The centroid and the probability mass of each probability cell -#' @export #' @import abind -#' @examples -#' outputs = array(runif(9*20)*15, dim = c(3,3,20)) -#' cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3) -#' density_ratio = rep(1,20) -#' compute_centroids_and_proba(outputs = outputs,cell_numbers = cell_numbers, -#' density_ratio = density_ratio) -compute_centroids_and_proba = function(outputs, cell_numbers, method_IS = "unique", density_ratio = rep(1, dim(outputs)[length(dim(outputs))]), bias = rep(0,length(unique(unlist(cell_numbers))))){ - n = length(cell_numbers)#nb of outputs +compute_centroids_and_proba = function(data, cell_numbers, method_IS = "unique", density_ratio = rep(1, dim(data)[length(dim(data))]), bias = rep(0,length(unique(unlist(cell_numbers)))), batch = FALSE){ + n = length(unlist(cell_numbers)) nb_cells = length(unique(unlist(cell_numbers))) centroids = list() probas = c() for(j in 1:nb_cells){ if(method_IS == "unique"){ - outputs_j = outputs + data_j = data cell_numbers_j = cell_numbers density_j = density_ratio } else if(method_IS == "percell"){ - outputs_j = outputs[[j]] + data_j = data[[j]] cell_numbers_j = cell_numbers[[j]] density_j = density_ratio[[j]] } - numerator = estim_num_centroid(outputs = outputs_j, cell_numbers = cell_numbers_j, density_ratio = density_j, cell = j) ## Sum the Y(X)f/nu of the cell - denominator = estim_denom_centroid(density_ratio = density_j, cell_numbers = cell_numbers_j, cell = j, bias = bias[j]) + numerator = estim_num_centroid(data = data_j, cell_numbers = cell_numbers_j, density_ratio = density_j, cell = j, batch = batch) ## Sum the Y(X)f/nu of the cell + if(batch){denominator = sum(Vectorize(function(p){estim_denom_centroid(density_ratio = density_j[[p]], cell_numbers = cell_numbers_j[[p]], cell = j, bias = bias[j])})(1:length(density_j)))} + else{denominator = estim_denom_centroid(density_ratio = density_j, cell_numbers = cell_numbers_j, cell = j, bias = bias[j])} centroids[[j]] = numerator/denominator probas = c(probas, denominator/n) } diff --git a/R/create_model_tuning.R b/R/create_models_tuning.R similarity index 100% rename from R/create_model_tuning.R rename to R/create_models_tuning.R diff --git a/R/density_ratio.R b/R/density_ratio.R index b79e344..992ead9 100644 --- a/R/density_ratio.R +++ b/R/density_ratio.R @@ -5,6 +5,7 @@ #' @param inputs The value of the sampled inputs #' #' @return A vector with the weights fX/g of the inputs +#' @import foreach #' @export #' #' @examples @@ -13,8 +14,8 @@ #' inputs = array(rnorm(30), dim = c(10,3)) #' compute_density_ratio(f,g, inputs) compute_density_ratio = function(f, g, inputs){ - res = foreach(i = 1:nrow(inputs), .combine = 'c')%dopar%{ - as.numeric(f(inputs[i,])/g(inputs[i,])) + res = foreach(it = 1:nrow(inputs), .combine = 'c')%dopar%{ + as.numeric(f(inputs[it,])/g(inputs[it,])) } return(res) } diff --git a/R/distance2gamma.R b/R/distance2gamma.R deleted file mode 100644 index 9966b16..0000000 --- a/R/distance2gamma.R +++ /dev/null @@ -1,17 +0,0 @@ -#' @title Compute the distance between a point and its nearest centroid, returning this distance and the associated cell number -#' -#' @param x A point in the output space -#' @param gamma A set of prototypes -#' @param distance_func A function computing a distance between two elements in the output spaces -#' -#' @return The distance between a point and its nearest centroid -#' @export -#' -#' @examples -#' distance_to_gamma(array(1:9, dim = c(3,3)), list(array(10, dim = c(3,3)), -#' array(5, dim = c(3,3)), array(6, dim = c(3,3))), -#' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}) -distance_to_gamma = function(x, gamma, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){ - distance = Vectorize(function(k){distance_func(x, gamma[[k]])})(1:length(gamma)) - return(list(cellule = which.min(distance), dist = min(distance))) -} diff --git a/R/distance_to_prototypes.R b/R/distance_to_prototypes.R new file mode 100644 index 0000000..93bcd53 --- /dev/null +++ b/R/distance_to_prototypes.R @@ -0,0 +1,17 @@ +#' @title Compute the distance between a point and its nearest prototype, returning this distance and the associated cell number +#' +#' @param x A point in the space of the data elements +#' @param prototypes A set of prototypes +#' @param distance_func A function computing a distance between two data elements +#' +#' @return The distance between a point and its nearest centroid +#' @export +#' +#' @examples +#' distance_to_prototypes(array(1:9, dim = c(3,3)), list(array(10, dim = c(3,3)), +#' array(5, dim = c(3,3)), array(6, dim = c(3,3))), +#' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}) +distance_to_prototypes = function(x, prototypes, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){ + distance = Vectorize(function(k){distance_func(x, prototypes[[k]])})(1:length(prototypes)) + return(list(cellule = which.min(distance), dist = min(distance))) +} diff --git a/R/distinct_gamma.R b/R/distinct_gamma.R deleted file mode 100644 index cfe403f..0000000 --- a/R/distinct_gamma.R +++ /dev/null @@ -1,16 +0,0 @@ -#' @title Check that all the elements in gamma are distinct -#' -#' @param gamma A list of prototypes -#' -#' @return A boolean indicating whether the elements in gamma are distinct -#' @export -#' -#' @examples -#' distinct_gamma(list(1,2,34,1)) -distinct_gamma = function(gamma){ - for(i in 1:(length(gamma)-1)){ - dist_gamma = distance_to_gamma(gamma[[i]], lapply((i+1):length(gamma), function(j){gamma[[j]]}))$dist - if(dist_gamma == 0){return(FALSE)} - } - return(TRUE) -} diff --git a/R/distinct_prototypes.R b/R/distinct_prototypes.R new file mode 100644 index 0000000..9a71fef --- /dev/null +++ b/R/distinct_prototypes.R @@ -0,0 +1,16 @@ +#' @title Check that all the elements in a list of prototypes are distinct +#' +#' @param prototypes A list of prototypes +#' +#' @return A boolean indicating whether the elements in prototypes are distinct +#' @export +#' +#' @examples +#' distinct_prototypes(list(1,2,34,1)) +distinct_prototypes = function(prototypes){ + for(i in 1:(length(prototypes)-1)){ + dist_prototypes = distance_to_prototypes(prototypes[[i]], lapply((i+1):length(prototypes), function(j){prototypes[[j]]}))$dist + if(dist_prototypes == 0){return(FALSE)} + } + return(TRUE) +} diff --git a/R/error_quanti.R b/R/error_quanti.R index 0be082a..cf6678f 100644 --- a/R/error_quanti.R +++ b/R/error_quanti.R @@ -1,24 +1,32 @@ #' @title Compututation of the empirical quantization error #' -#' @param outputs The output samples that need to be quantized. Useful only if cell_numbers == NULL. -#' @param gamma A set of prototypes. Useful only if cell_numbers == NULL. -#' @param density_ratio density_ratio indicates the weight fX/g of each output -#' @param distance_func A function computing a distance between two elements in the output spaces. Useful only if cell_numbers == NULL. - +#' @param data The data that needs to be quantized. Useful only if cell_numbers == NULL. +#' @param prototypes A set of prototypes. Useful only if cell_numbers == NULL. +#' @param density_ratio density_ratio indicates the weight fX/g of each data element. +#' @param distance_func A function computing a distance between two data elements. Useful only if cell_numbers == NULL. +#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False. +#' #' @return An estimation of the quantization error #' @export #' @import abind #' @examples -#' gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) -#' outputs = array(runif(9*20)*20, dim = c(3,3,20)) +#' prototypes = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) +#' data = array(runif(9*20)*20, dim = c(3,3,20)) #' density_ratio = rep(1,20) #' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -#' quanti_error(outputs = outputs, gamma = gamma, density_ratio = density_ratio, +#' quanti_error(data = data, prototypes = prototypes, density_ratio = density_ratio, #' distance_func = distance_func) -quanti_error = function(outputs, gamma, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){ +quanti_error = function(data, prototypes, density_ratio, batch = FALSE, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){ drop = "selected" - if(sum(dim(gamma[[1]]) == 1) == length(dim(gamma[[1]]))){drop = FALSE} - distances = Vectorize(function(it){distance_to_gamma(x = asub(x = outputs, dims = length(dim(outputs)), idx = it,drop = drop), gamma = gamma, distance_func = distance_func)$dist})(1:dim(outputs)[length(dim(outputs))]) - return(sqrt(mean(distances^2*density_ratio))) + if(sum(dim(prototypes[[1]]) == 1) == length(dim(prototypes[[1]]))){drop = FALSE} + if(!batch){ + distances = Vectorize(function(it){distance_to_prototypes(x = asub(x = data, dims = length(dim(data)), idx = it,drop = drop), prototypes = prototypes, distance_func = distance_func)$dist})(1:dim(data)[length(dim(data))]) + res = sqrt(mean(distances^2*density_ratio)) + } + else{ + distances = as.numeric(sapply(1:length(data), function(b){Vectorize(function(it){distance_to_prototypes(x = asub(x = data[[b]], dims = length(dim(data[[b]])), idx = it,drop = drop), prototypes = prototypes, distance_func = distance_func)$dist})(1:dim(data[[b]])[length(dim(data[[b]]))])})) + res = sqrt(mean(distances^2*unlist(density_ratio))) + } + return(res) } diff --git a/R/estim_denom_centroid.R b/R/estim_denom_centroid.R index 7ab4dc8..a64b6cf 100644 --- a/R/estim_denom_centroid.R +++ b/R/estim_denom_centroid.R @@ -1,7 +1,7 @@ #' Title Compute the estimator which is the denominator of the centroid estimation #' -#' @param density_ratio density_ratio indicates the weight fX/g of each output -#' @param cell_numbers The output samples that need to be quantized +#' @param density_ratio density_ratio indicates the weight fX/g of each data element +#' @param cell_numbers The voronoi cell number of every data element. #' @param cell The cell number of the computed centroid #' @param bias A number indicating the bias that came out when computing the importance sampling estimators of the membership probabilities of the Voronoi cell. Default is 0. #' diff --git a/R/estim_num_centroid.R b/R/estim_num_centroid.R index d2fdacf..2088bf6 100644 --- a/R/estim_num_centroid.R +++ b/R/estim_num_centroid.R @@ -1,27 +1,41 @@ #' @title Compute the estimator which is the numerator of the centroid estimation #' -#' @param outputs The output samples that need to be quantized -#' @param cell_numbers The voronoi cell number of every output -#' @param density_ratio density_ratio indicates the weight fX/g of each output +#' @param data The data that needs to be quantized +#' @param cell_numbers The voronoi cell number of every data element +#' @param density_ratio density_ratio indicates the weight fX/g of each data element #' @param cell The cell number of the computed centroid -#' -#' @return An array having the same dimension as an output, which is the numerator of the centroid estimator +#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False. +#' @return An array having the same dimension as an data element, which is the numerator of the centroid estimator #' @export #' @import abind #' @examples -#' outputs = array(runif(9*20)*15, dim = c(3,3,20)) +#' data = array(runif(9*20)*15, dim = c(3,3,20)) #' cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3) #' density_ratio = rep(1,20) #' cell = 3 -#' estim_num_centroid(outputs = outputs,cell_numbers = cell_numbers, +#' estim_num_centroid(data = data,cell_numbers = cell_numbers, #' density_ratio = density_ratio, cell = cell) -estim_num_centroid = function(outputs, cell_numbers, density_ratio, cell){ - outputs_cell = asub(x = outputs, dims = length(dim(outputs)), idx = which(cell_numbers == cell)) - outputs_cell = matrix(outputs_cell, nrow = prod(dim(outputs)[1:(length(dim(outputs))-1)])) - outputs_cell = t(outputs_cell) - res = outputs_cell*density_ratio[cell_numbers == cell] - res = apply(res,2,sum) - res = array(res, dim = dim(outputs)[1:(length(dim(outputs))-1)]) +estim_num_centroid = function(data, cell_numbers, density_ratio, cell, batch = FALSE){ + if(batch){ + res = 0 + for(batch_i in 1:length(data)){ + data_cell = asub(x = data[[batch_i]], dims = length(dim(data[[batch_i]])), idx = which(cell_numbers[[batch_i]] == cell)) + data_cell = matrix(data_cell, nrow = prod(dim(data[[batch_i]])[1:(length(dim(data[[batch_i]]))-1)])) + data_cell = t(data_cell) + data_cell = data_cell*density_ratio[[batch_i]][cell_numbers[[batch_i]] == cell] + res = res + apply(data_cell,2,sum) + } + res = array(res, dim = dim(data[[1]])[1:(length(dim(data[[1]]))-1)]) + + } + else{ + data_cell = asub(x = data, dims = length(dim(data)), idx = which(cell_numbers == cell)) + data_cell = matrix(data_cell, nrow = prod(dim(data)[1:(length(dim(data))-1)])) + data_cell = t(data_cell) + data_cell = data_cell*density_ratio[cell_numbers == cell] + res = apply(data_cell,2,sum) + res = array(res, dim = dim(data)[1:(length(dim(data))-1)]) + } return(res) } diff --git a/R/find_prototypes.R b/R/find_prototypes.R new file mode 100644 index 0000000..95e4354 --- /dev/null +++ b/R/find_prototypes.R @@ -0,0 +1,93 @@ + +#' @title The algorithm to find prototypes with multistart. Providing prototype data and their probability masses. + +#' @param starting_proto Optional. If multistart = 1, starting_proto is a list of initial prototypes. Else, starting_proto is a list of list of prototypes, which length is equal to multistart. +#' @param nb_cells Required only if starting_proto is NULL. Indicates the number of prototypes of the quantization. +#' @param data The data that needs to be quantized. If method = "percell", a list of data samples must be provided, of length equal to the number of Voronoï cells. +#' @param multistart Number of starts of the algorithm +#' @param method_IS The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell. Default is "unique". +#' @param density_ratio A vector indicating the weight fX/g of each data element. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells. +#' @param budget The maximum number of iterations of the algorithm. Default is 10^3. +#' @param distance_func A function computing a distance between two data elements. +#' @param print_progress A boolean indicating whether to print the progress through the start numbers. Default is FALSE. +#' @param threshold A real positive number. When the distance between the new centroids and the previous ones is lower than this value, then we stop the algorithm. +#' @param trace A boolean. If TRUE, tracing information on the progress of the algorithm is produced. Default is FALSE. +#' @param bias A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells. +#' @param index_sampling_error Required only if method_IS = "percell". Indicates which of the data samples must be used for the computation of the quantization error. +#' @param all_starts A boolean indicating whether the function should return the optimal prototypes obtained for each start. +#' @param seed An optional random seed. +#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False. + +#' @return A list containing : +#' - prototypes : the list of optimal prototypes +#' - probas : a vector indicating the probability mass of the prototypes +#' - cell_numbers : a vector indicating the cell number associated to each data element +#' - iterations : an integer indicating the number of iterations performed +#' - record : a list containing all the centroids computed through the iterations of the best start +#' - all_errors : a vector indicating the quantization error of each start +#' - all_starts : a list indicating all the best prototypes obtained for each start. Provided only if all_start = TRUE. +#' +#' @export +#' @import abind +#' @examples +#' set.seed(20) +#' data = array(runif(9*20)*15, dim = c(3,3,20)) +#' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} +#' find_prototypes(nb_cells = 3, data = data, +#' multistart = 2, distance_func = distance_func) + +find_prototypes = function(starting_proto = NULL, nb_cells = NULL, data, multistart = 1, method_IS = "unique", density_ratio = rep(1, dim(data)[length(dim(data))]), budget = 10^3, threshold = 0, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},print_progress = FALSE, trace = FALSE, all_starts = FALSE,bias = NULL, index_sampling_error = NULL, seed = NULL, batch = FALSE){ + if(is.null(dim(data))){data = t(as.matrix(data))} + if(!is.null(seed)){set.seed(seed)} + best_error = +Inf + best_list = list() + all_errors = c() + all_proto = list() + for(it in 1:multistart){ + if(print_progress){print(paste("start number", it))} + if(is.null(starting_proto)){ + are_distinct = FALSE + while(are_distinct == FALSE){ + prototypes_it = lapply(sample(x = 1:dim(data)[length(dim(data))], size = nb_cells), function(i){asub(data,dims = length(dim(data)), idx = i)}) + are_distinct = distinct_prototypes(prototypes_it) + } + } + else{ + if(multistart == 1){prototypes_it = starting_proto} + else{prototypes_it = starting_proto[[it]]} + } + prototypes_it = oned_to_matrix(prototypes_it) + prototypes_it = sort_prototypes(prototypes_it) + if(is.null(bias)){bias = rep(0, length(prototypes_it))} + record = NULL + if(trace){record = list(prototypes_it)} + for (i in 1:budget){ + if(print_progress){print(paste("Iteration number", i))} + if(method_IS == "unique"){ + if(batch){cell_numbers = lapply(data,function(x){get_cell_numbers(x,prototypes_it,distance_func = distance_func)})} + else{cell_numbers = get_cell_numbers(data,prototypes_it,distance_func = distance_func)} + if(length(table(unlist(cell_numbers))) 0)*outputs_pred[[i]]} - probas_pred_cv = get_probas(density_ratio = density_ratio, outputs = outputs_pred[[i]], gamma = gamma, distance_func = distance_func, cells = 1:length(gamma), bias = bias) + probas_pred_cv = get_probas(density_ratio = density_ratio, data = outputs_pred[[i]], prototypes = prototypes, distance_func = distance_func, cells = 1:length(prototypes), bias = bias) probas_pred_df = rbind(probas_pred_df,probas_pred_cv) relative_error_df = rbind(relative_error_df, abs(probas_pred_cv - probas_true)/probas_true) if(return_pred == FALSE){outputs_pred = list()} diff --git a/R/rf_proba_training_test.R b/R/rf_proba_training_test.R index 57497ec..7321046 100644 --- a/R/rf_proba_training_test.R +++ b/R/rf_proba_training_test.R @@ -14,7 +14,7 @@ #' @param seed An optional random seed #' @param ... other parameters of \code{\link{randomForest}} function from \code{randomForest}. #' @param density_ratio density_ratio indicates the weight fX/g of each output -#' @param gamma A set of l prototypes defining the Voronoï cells +#' @param prototypes A set of l prototypes defining the Voronoï cells #' @param distance_func A function computing a distance between two elements in the output spaces. #' @param return_pred A boolean indicating whether the predicted outputs should be returned or not #' @param only_positive A boolean indicating whether the predicted outputs should only contained positive values or not. Default is FALSE. @@ -85,23 +85,26 @@ #' list_search = list("nodesize" = as.list(df_search[,2]), "classwt" = lapply(1:nrow(df_search), #' function(i){c(df_search[i,1], 1-df_search[i,1])})) #' density_ratio = rep(1,50) -#' gamma = lapply(c(2,3,51,7), function(i){outputs[,,i]}) +#' prototypes = lapply(c(2,3,51,7), function(i){outputs[,,i]}) #' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -#' list_rf_prob_train_test = rf_probas_training_test(design_train = design_train, +#' list_rf_prob_train_test = rf_probas_training_test(design_train = +#' design_train, #' design_test = design_test, outputs_train = outputs_train, -#' outputs_test = outputs_test, threshold_classification = 2, threshold_fpca = 0, list_search = list_search, -#' density_ratio = density_ratio, gamma = gamma, distance_func= distance_func, +#' outputs_test = outputs_test, threshold_classification = 2, +#' threshold_fpca = 0, list_search = list_search, +#' density_ratio = density_ratio, prototypes = prototypes, +#' distance_func= distance_func, #' ncoeff = 400, npc = 6, control = list(trace = FALSE)) -rf_probas_training_test = function(design_train, design_test, outputs_train, outputs_test,threshold_classification, threshold_fpca = NULL, list_search, density_ratio, gamma, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},return_pred = FALSE, only_positive = FALSE, seed = NULL, ncoeff,npc, formula = ~1, covtype="matern5_2", wf = "d4", boundary = "periodic",J=1, +rf_probas_training_test = function(design_train, design_test, outputs_train, outputs_test,threshold_classification, threshold_fpca = NULL, list_search, density_ratio, prototypes, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},return_pred = FALSE, only_positive = FALSE, seed = NULL, ncoeff,npc, formula = ~1, covtype="matern5_2", wf = "d4", boundary = "periodic",J=1, coef.trend = NULL, coef.cov = NULL, coef.var = NULL, nugget = NULL, noise.var=NULL, lower = NULL, upper = NULL, parinit = NULL, multistart=1, kernel=NULL,control = NULL,type = "UK",bias = NULL,...){ if(is.null(seed)==FALSE){set.seed(seed)} if(is.null(threshold_fpca)){threshold_fpca = threshold_classification} - probas_true = get_probas(density_ratio = density_ratio, outputs = outputs_test, gamma = gamma, distance_func = distance_func, cells = 1:length(gamma), bias = bias) + probas_true = get_probas(density_ratio = density_ratio, data = outputs_test, prototypes = prototypes, distance_func = distance_func, cells = 1:length(prototypes), bias = bias) probas_pred_df = data.frame() relative_error_df = data.frame() sum_depth = Vectorize(function(it){sum(asub(x = outputs_train, idx = it, dims = length(dim(outputs_train)), drop = "selected"))})(1:dim(outputs_train)[length(dim(outputs_train))]) @@ -136,7 +139,7 @@ rf_probas_training_test = function(design_train, design_test, outputs_train, out afill(outputs_pred[[i]]) = outputs_pred_draft } if(only_positive){outputs_pred[[i]] = (outputs_pred[[i]] > 0)*outputs_pred[[i]]} - probas_pred_cv = get_probas(density_ratio = density_ratio, outputs = outputs_pred[[i]], gamma = gamma, distance_func = distance_func, cells = 1:length(gamma), bias = bias) + probas_pred_cv = get_probas(density_ratio = density_ratio, data = outputs_pred[[i]], prototypes = prototypes, distance_func = distance_func, cells = 1:length(prototypes), bias = bias) probas_pred_df = rbind(probas_pred_df,probas_pred_cv) relative_error = abs(probas_pred_cv - probas_true)/probas_true relative_error[probas_pred_cv == 0 & probas_true == 0] = 0 diff --git a/R/rf_rmse_k_fold.R b/R/rf_rmse_k_fold.R index 41c66c5..ef99315 100644 --- a/R/rf_rmse_k_fold.R +++ b/R/rf_rmse_k_fold.R @@ -71,7 +71,8 @@ #' outputs = func2D(design) #' list_search = list("nodesize" = as.list(c(1,3,5,7,9,11))) #' list_rf_rmse_k_fold = rf_rmse_k_fold(design = design,outputs = outputs, -#' threshold_classification = 2, threshold_fpca = 0, list_search = list_search, nb_folds = 10, ncoeff = 400, +#' threshold_classification = 2, threshold_fpca = 0, +#' list_search = list_search, nb_folds = 10, ncoeff = 400, #' npc = 6, control = list(trace = FALSE)) rf_rmse_k_fold = function(design, outputs, threshold_classification, threshold_fpca = NULL, list_search, nb_folds, return_pred = FALSE, only_positive = FALSE, seed = NULL, ncoeff,npc, formula = ~1, covtype="matern5_2", wf = "d4", boundary = "periodic",J=1, diff --git a/R/rf_rmse_training_test.R b/R/rf_rmse_training_test.R index 06e4075..1cd7ddf 100644 --- a/R/rf_rmse_training_test.R +++ b/R/rf_rmse_training_test.R @@ -81,9 +81,12 @@ #' "classwt" = lapply(1:nrow(df_search), function(i){c( #' df_search[i,1], 1-df_search[i,1])})) -#' list_rf_rmse_train_test = rf_rmse_training_test(design_train = design_train, -#' design_test = design_test, outputs_train = outputs_train, outputs_test = -#' outputs_test, threshold_classification = 2, threshold_fpca = 0, list_search = list_search, ncoeff = 400, +#' list_rf_rmse_train_test = rf_rmse_training_test(design_train = +#' design_train, +#' design_test = design_test, outputs_train = outputs_train, +#' outputs_test = +#' outputs_test, threshold_classification = 2, +#' threshold_fpca = 0, list_search = list_search, ncoeff = 400, #' npc = 6, control = list(trace = FALSE)) rf_rmse_training_test = function(design_train, design_test, outputs_train, outputs_test,threshold_classification, threshold_fpca = NULL, list_search,return_pred = FALSE, only_positive = FALSE, seed = NULL, ncoeff,npc, formula = ~1, covtype="matern5_2", wf = "d4", boundary = "periodic",J=1, diff --git a/R/sort_gamma.R b/R/sort_gamma.R deleted file mode 100644 index 82e76f7..0000000 --- a/R/sort_gamma.R +++ /dev/null @@ -1,16 +0,0 @@ -#' @title Sorting the prototypes by increasing sum of their elements (absolute value) -#' -#' @param gamma A set of prototypes -#' -#' @return The same set of prototypes but sorted by increasing sum of their elements (absolute value) -#' @export -#' -#' @examples -#' -#' sort_gamma(gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3)))) -sort_gamma = function(gamma){ - gamma_sorted = gamma - sums = Vectorize(function(k){sum(gamma[[k]])})(1:length(gamma)) - for(k in 1:length(gamma)){gamma_sorted[[rank(sums)[k]]] = gamma[[k]]} - return(gamma_sorted) -} diff --git a/R/sort_prototypes.R b/R/sort_prototypes.R new file mode 100644 index 0000000..c5a91a3 --- /dev/null +++ b/R/sort_prototypes.R @@ -0,0 +1,17 @@ +#' @title Sorting the prototypes by increasing sum of their elements (absolute value) +#' +#' @param prototypes A set of prototypes +#' +#' @return The same set of prototypes but sorted by increasing sum of their elements (absolute value) +#' @export +#' +#' @examples +#' +#' sort_prototypes(prototypes = list(array(10, dim = c(3,3)), +#' array(5, dim = c(3,3)), array(6, dim = c(3,3)))) +sort_prototypes = function(prototypes){ + prototypes_sorted = prototypes + sums = Vectorize(function(k){sum(prototypes[[k]])})(1:length(prototypes)) + for(k in 1:length(prototypes)){prototypes_sorted[[rank(sums)[k]]] = prototypes[[k]]} + return(prototypes_sorted) +} diff --git a/R/std_centroid.R b/R/std_centroid.R index 174743a..2e65b7c 100644 --- a/R/std_centroid.R +++ b/R/std_centroid.R @@ -1,12 +1,12 @@ #' @title Computation of the IS centroid standard deviation for different sets of prototypes. #' -#' @param outputs The output samples that need to be quantized. Useful only if cell_numbers == NULL. -#' @param gamma_list A list of gamma on which we want to evaluate the IS centroid standard deviation. Each gamma is a set of prototypes. -#' @param density_ratio density_ratio indicates the weight fX/g of each output. -#' @param distance_func A function computing a distance between two elements in the output spaces. +#' @param data The data samples that need to be quantized. Useful only if cell_numbers == NULL. +#' @param prototypes_list A list of set of prototypes on which we want to evaluate the IS centroid standard deviation. Each element is a list of prototypes. +#' @param density_ratio density_ratio indicates the weight fX/g of each data element. +#' @param distance_func A function computing a distance between two data elements. #' @param cells The Voronoï cell numbers that we are investigating. #' @param nv The size of the sample for which we want to estimate the IS centroid standard deviation. -#' @param cell_numbers An optional list providing for each set of prototypes the voronoi cell number of every output. +#' @param cell_numbers An optional list providing for each set of prototypes the voronoi cell number of every data element. #' @return A list providing for each set of prototypes a list the IS centroid standard deviation for each voronoi cell #' @export #' @import abind @@ -21,21 +21,21 @@ #' return(Ymaps) #' } #' design = data.frame(X = seq(-1,1,l= 50)) -#' outputs = func2D(design) -#' gamma_list = list(lapply(c(1,3,10,14,18), function(i){outputs[,,i]})) +#' data = func2D(design) +#' prototypes_list = list(lapply(c(1,3,10,14,18), function(i){data[,,i]})) #' density_ratio = rep(1, 50) #' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -#' list_std_centroid = std_centroid(outputs = outputs, gamma_list = -#' gamma_list, density_ratio = density_ratio, distance_func = distance_func -#' , cells = 1:length(gamma_list[[1]]), nv = 50) -std_centroid = function(outputs, gamma_list, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv){ +#' list_std_centroid = std_centroid(data = data, prototypes_list = +#' prototypes_list, density_ratio = density_ratio, distance_func = distance_func +#' , cells = 1:length(prototypes_list[[1]]), nv = 50) +std_centroid = function(data, prototypes_list, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv){ - weighted_map = t(matrix(outputs, nrow = prod(dim(outputs)[-length(dim(outputs))]),ncol = dim(outputs)[length(dim(outputs))]))*density_ratio + weighted_map = t(matrix(data, nrow = prod(dim(data)[-length(dim(data))]),ncol = dim(data)[length(dim(data))]))*density_ratio std_ratio_list = list() - for(it in 1:length(gamma_list)){ - gamma = gamma_list[[it]]#for all Gamma + for(it in 1:length(prototypes_list)){ + prototypes = prototypes_list[[it]] std_ratio_list[[it]] = as.list(rep(0,length(cells))) - if(is.null(cell_numbers)){cell_numbers_it = get_cell_numbers(outputs = outputs, gamma = gamma, distance_func = distance_func)} + if(is.null(cell_numbers)){cell_numbers_it = get_cell_numbers(data = data, prototypes = prototypes, distance_func = distance_func)} else{cell_numbers_it = cell_numbers[[it]]} for(j in cells){#for all voronoi cells map_loop = weighted_map #weighted map is the set of maps multiplied by the weights f_{x}/mu diff --git a/R/std_proba.R b/R/std_proba.R index 0101334..c339d65 100644 --- a/R/std_proba.R +++ b/R/std_proba.R @@ -1,12 +1,12 @@ #' @title Computation of the IS coefficients of variation of the membership probability for different set of prototypes. #' -#' @param outputs The output samples that need to be quantized. Useful only if cell_numbers == NULL. -#' @param gamma_list A list of gamma on which we want to evaluate the IS coefficient of variation of the membership probability. Each gamma is a set of prototypes -#' @param density_ratio density_ratio indicates the weight fX/g of each output -#' @param distance_func A function computing a distance between two elements in the output spaces. +#' @param data The data that needs to be quantized. Useful only if cell_numbers == NULL. +#' @param prototypes_list A list of set of prototypes on which we want to evaluate the IS centroid standard deviation. Each element is a list of prototypes. +#' @param density_ratio density_ratio indicates the weight fX/g of each data element. +#' @param distance_func A function computing a distance between two data elements. #' @param cells The Voronoï cell numbers that we are investigating #' @param nv The size of the sample for which we want to estimate the IS coefficient of variation of the membership probability -#' @param cell_numbers An optional list providing for each set of prototypes the voronoi cell number of every output. +#' @param cell_numbers An optional list providing for each set of prototypes the voronoi cell number of every data element. #' #' @return A list of IS coefficients of variation of the membership probability obtained for each set of prototypes. #' @export @@ -22,22 +22,22 @@ #' return(Ymaps) #' } #' design = data.frame(X = seq(-1,1,l= 50)) -#' outputs = func2D(design) -#' gamma_list = list(lapply(c(1,3,10,14,18), function(i){outputs[,,i]})) +#' data = func2D(design) +#' prototypes_list = list(lapply(c(1,3,10,14,18), function(i){data[,,i]})) #' density_ratio = rep(1, 50) #' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -#' list_std_proba = std_proba(outputs = outputs, gamma_list = gamma_list, +#' list_std_proba = std_proba(data = data, prototypes_list = prototypes_list, #' density_ratio = density_ratio, distance_func = distance_func, -#' cells = 1:length(gamma_list[[1]]), nv = 50) -std_proba = function(outputs, gamma_list, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv){ +#' cells = 1:length(prototypes_list[[1]]), nv = 50) +std_proba = function(data, prototypes_list, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv){ std_list = list() - for(it in 1:length(gamma_list)){ #for all Gamma in (Gamma^r) + for(it in 1:length(prototypes_list)){ - if(is.null(cell_numbers)){cell_numbers_it = get_cell_numbers(outputs = outputs, gamma = gamma_list[[it]], distance_func = distance_func)} + if(is.null(cell_numbers)){cell_numbers_it = get_cell_numbers(data = data, prototypes = prototypes_list[[it]], distance_func = distance_func)} else{cell_numbers_it = cell_numbers[[it]]} df_for_std = Vectorize(function(i){density_ratio*(cell_numbers_it == i)})(cells) - std_list[[it]] = apply(df_for_std, 2, function(x){sqrt(var(x))})/apply(df_for_std,2,mean)/sqrt(nv) #for all voronoi cells of Gamma, we compute the relative standard error + std_list[[it]] = apply(df_for_std, 2, function(x){sqrt(var(x))})/apply(df_for_std,2,mean)/sqrt(nv) #for all voronoi cells, we compute the relative standard error } return(std_list) } diff --git a/man/compute_centroids_and_proba.Rd b/man/compute_centroids_and_proba.Rd index a2e61d9..ac3d639 100644 --- a/man/compute_centroids_and_proba.Rd +++ b/man/compute_centroids_and_proba.Rd @@ -5,23 +5,26 @@ \title{Compute the centroid and the probability mass of the Voronoï cells} \usage{ compute_centroids_and_proba( - outputs, + data, cell_numbers, method_IS = "unique", - density_ratio = rep(1, dim(outputs)[length(dim(outputs))]), - bias = rep(0, length(unique(unlist(cell_numbers)))) + density_ratio = rep(1, dim(data)[length(dim(data))]), + bias = rep(0, length(unique(unlist(cell_numbers)))), + batch = FALSE ) } \arguments{ -\item{outputs}{The output samples that need to be quantized. If method = "percell", a list of output samples must be provided, of length equal to the number of Voronoï cells.} +\item{data}{The data that needs to be quantized. If method = "percell", a list of data samples must be provided, of length equal to the number of Voronoï cells.} -\item{cell_numbers}{The voronoi cell number of every output} +\item{cell_numbers}{The voronoi cell number of every data element} \item{method_IS}{The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell.} -\item{density_ratio}{A vector indicating the weight fX/g of each output. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.} +\item{density_ratio}{A vector indicating the weight fX/g of each data element. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.} \item{bias}{A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells.} + +\item{batch}{A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.} } \value{ The centroid and the probability mass of each probability cell @@ -29,10 +32,3 @@ The centroid and the probability mass of each probability cell \description{ Compute the centroid and the probability mass of the Voronoï cells } -\examples{ -outputs = array(runif(9*20)*15, dim = c(3,3,20)) -cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3) -density_ratio = rep(1,20) -compute_centroids_and_proba(outputs = outputs,cell_numbers = cell_numbers, -density_ratio = density_ratio) -} diff --git a/man/create_models_tuning.Rd b/man/create_models_tuning.Rd index 68aa8c8..a88e56b 100644 --- a/man/create_models_tuning.Rd +++ b/man/create_models_tuning.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/create_model_tuning.R +% Please edit documentation in R/create_models_tuning.R \name{create_models_tuning} \alias{create_models_tuning} \title{Computation of GP models in the PCA space for different ncoeff values} diff --git a/man/distance_to_gamma.Rd b/man/distance_to_gamma.Rd deleted file mode 100644 index d9d1c96..0000000 --- a/man/distance_to_gamma.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distance2gamma.R -\name{distance_to_gamma} -\alias{distance_to_gamma} -\title{Compute the distance between a point and its nearest centroid, returning this distance and the associated cell number} -\usage{ -distance_to_gamma( - x, - gamma, - distance_func = function(A1, A2) { - return(sqrt(sum((A1 - A2)^2))) - } -) -} -\arguments{ -\item{x}{A point in the output space} - -\item{gamma}{A set of prototypes} - -\item{distance_func}{A function computing a distance between two elements in the output spaces} -} -\value{ -The distance between a point and its nearest centroid -} -\description{ -Compute the distance between a point and its nearest centroid, returning this distance and the associated cell number -} -\examples{ -distance_to_gamma(array(1:9, dim = c(3,3)), list(array(10, dim = c(3,3)), -array(5, dim = c(3,3)), array(6, dim = c(3,3))), -distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}) -} diff --git a/man/distance_to_prototypes.Rd b/man/distance_to_prototypes.Rd new file mode 100644 index 0000000..f8ac490 --- /dev/null +++ b/man/distance_to_prototypes.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distance_to_prototypes.R +\name{distance_to_prototypes} +\alias{distance_to_prototypes} +\title{Compute the distance between a point and its nearest prototype, returning this distance and the associated cell number} +\usage{ +distance_to_prototypes( + x, + prototypes, + distance_func = function(A1, A2) { + return(sqrt(sum((A1 - A2)^2))) + } +) +} +\arguments{ +\item{x}{A point in the space of the data elements} + +\item{prototypes}{A set of prototypes} + +\item{distance_func}{A function computing a distance between two data elements} +} +\value{ +The distance between a point and its nearest centroid +} +\description{ +Compute the distance between a point and its nearest prototype, returning this distance and the associated cell number +} +\examples{ +distance_to_prototypes(array(1:9, dim = c(3,3)), list(array(10, dim = c(3,3)), +array(5, dim = c(3,3)), array(6, dim = c(3,3))), +distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}) +} diff --git a/man/distinct_gamma.Rd b/man/distinct_gamma.Rd deleted file mode 100644 index c1e668b..0000000 --- a/man/distinct_gamma.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distinct_gamma.R -\name{distinct_gamma} -\alias{distinct_gamma} -\title{Check that all the elements in gamma are distinct} -\usage{ -distinct_gamma(gamma) -} -\arguments{ -\item{gamma}{A list of prototypes} -} -\value{ -A boolean indicating whether the elements in gamma are distinct -} -\description{ -Check that all the elements in gamma are distinct -} -\examples{ -distinct_gamma(list(1,2,34,1)) -} diff --git a/man/distinct_prototypes.Rd b/man/distinct_prototypes.Rd new file mode 100644 index 0000000..177bca2 --- /dev/null +++ b/man/distinct_prototypes.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distinct_prototypes.R +\name{distinct_prototypes} +\alias{distinct_prototypes} +\title{Check that all the elements in a list of prototypes are distinct} +\usage{ +distinct_prototypes(prototypes) +} +\arguments{ +\item{prototypes}{A list of prototypes} +} +\value{ +A boolean indicating whether the elements in prototypes are distinct +} +\description{ +Check that all the elements in a list of prototypes are distinct +} +\examples{ +distinct_prototypes(list(1,2,34,1)) +} diff --git a/man/estim_denom_centroid.Rd b/man/estim_denom_centroid.Rd index 430eaed..96664c4 100644 --- a/man/estim_denom_centroid.Rd +++ b/man/estim_denom_centroid.Rd @@ -7,9 +7,9 @@ estim_denom_centroid(density_ratio, cell_numbers, cell, bias = 0) } \arguments{ -\item{density_ratio}{density_ratio indicates the weight fX/g of each output} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element} -\item{cell_numbers}{The output samples that need to be quantized} +\item{cell_numbers}{The voronoi cell number of every data element.} \item{cell}{The cell number of the computed centroid} diff --git a/man/estim_num_centroid.Rd b/man/estim_num_centroid.Rd index 6508274..14702f9 100644 --- a/man/estim_num_centroid.Rd +++ b/man/estim_num_centroid.Rd @@ -4,28 +4,30 @@ \alias{estim_num_centroid} \title{Compute the estimator which is the numerator of the centroid estimation} \usage{ -estim_num_centroid(outputs, cell_numbers, density_ratio, cell) +estim_num_centroid(data, cell_numbers, density_ratio, cell, batch = FALSE) } \arguments{ -\item{outputs}{The output samples that need to be quantized} +\item{data}{The data that needs to be quantized} -\item{cell_numbers}{The voronoi cell number of every output} +\item{cell_numbers}{The voronoi cell number of every data element} -\item{density_ratio}{density_ratio indicates the weight fX/g of each output} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element} \item{cell}{The cell number of the computed centroid} + +\item{batch}{A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.} } \value{ -An array having the same dimension as an output, which is the numerator of the centroid estimator +An array having the same dimension as an data element, which is the numerator of the centroid estimator } \description{ Compute the estimator which is the numerator of the centroid estimation } \examples{ -outputs = array(runif(9*20)*15, dim = c(3,3,20)) +data = array(runif(9*20)*15, dim = c(3,3,20)) cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3) density_ratio = rep(1,20) cell = 3 -estim_num_centroid(outputs = outputs,cell_numbers = cell_numbers, +estim_num_centroid(data = data,cell_numbers = cell_numbers, density_ratio = density_ratio, cell = cell) } diff --git a/man/proto_map_algo.Rd b/man/find_prototypes.Rd similarity index 58% rename from man/proto_map_algo.Rd rename to man/find_prototypes.Rd index f3f283b..28a40ac 100644 --- a/man/proto_map_algo.Rd +++ b/man/find_prototypes.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/proto_map_algo.R -\name{proto_map_algo} -\alias{proto_map_algo} -\title{The prototype maps algorithm with multistart. Providing prototype outputs and their probability masses.} +% Please edit documentation in R/find_prototypes.R +\name{find_prototypes} +\alias{find_prototypes} +\title{The algorithm to find prototypes with multistart. Providing prototype data and their probability masses.} \usage{ -proto_map_algo( - gamma = NULL, +find_prototypes( + starting_proto = NULL, nb_cells = NULL, - outputs, + data, multistart = 1, method_IS = "unique", - density_ratio = rep(1, dim(outputs)[length(dim(outputs))]), + density_ratio = rep(1, dim(data)[length(dim(data))]), budget = 10^3, threshold = 0, distance_func = function(A1, A2) { @@ -21,27 +21,28 @@ proto_map_algo( all_starts = FALSE, bias = NULL, index_sampling_error = NULL, - seed = NULL + seed = NULL, + batch = FALSE ) } \arguments{ -\item{gamma}{Optional. If multistart = 1, gamma is a list of initial prototypes. Else, gamma is a list of list of prototypes, which length is equal to multistart.} +\item{starting_proto}{Optional. If multistart = 1, starting_proto is a list of initial prototypes. Else, starting_proto is a list of list of prototypes, which length is equal to multistart.} -\item{nb_cells}{Required only if gamma is NULL. Indicates the number of prototypes of the quantization.} +\item{nb_cells}{Required only if starting_proto is NULL. Indicates the number of prototypes of the quantization.} -\item{outputs}{The output samples that need to be quantized. If method = "percell", a list of output samples must be provided, of length equal to the number of Voronoï cells.} +\item{data}{The data that needs to be quantized. If method = "percell", a list of data samples must be provided, of length equal to the number of Voronoï cells.} \item{multistart}{Number of starts of the algorithm} \item{method_IS}{The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell. Default is "unique".} -\item{density_ratio}{A vector indicating the weight fX/g of each output. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.} +\item{density_ratio}{A vector indicating the weight fX/g of each data element. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.} \item{budget}{The maximum number of iterations of the algorithm. Default is 10^3.} \item{threshold}{A real positive number. When the distance between the new centroids and the previous ones is lower than this value, then we stop the algorithm.} -\item{distance_func}{A function computing a distance between two elements in the output spaces.} +\item{distance_func}{A function computing a distance between two data elements.} \item{print_progress}{A boolean indicating whether to print the progress through the start numbers. Default is FALSE.} @@ -51,27 +52,29 @@ proto_map_algo( \item{bias}{A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells.} -\item{index_sampling_error}{Required only if method_IS = "percell". Indicates which of the outputs samples must be used for the computation of the quantization error.} +\item{index_sampling_error}{Required only if method_IS = "percell". Indicates which of the data samples must be used for the computation of the quantization error.} + +\item{seed}{An optional random seed.} + +\item{batch}{A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.} } \value{ A list containing : -- gamma : the list of optimal prototypes +- prototypes : the list of optimal prototypes - probas : a vector indicating the probability mass of the prototypes -- cell_numbers : a vector indicating the cell number associated to each output +- cell_numbers : a vector indicating the cell number associated to each data element - iterations : an integer indicating the number of iterations performed - record : a list containing all the centroids computed through the iterations of the best start - all_errors : a vector indicating the quantization error of each start - all_starts : a list indicating all the best prototypes obtained for each start. Provided only if all_start = TRUE. } \description{ -The prototype maps algorithm with multistart. Providing prototype outputs and their probability masses. +The algorithm to find prototypes with multistart. Providing prototype data and their probability masses. } \examples{ set.seed(20) -gamma_list = list() -gamma_list[[1]] = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) -gamma_list[[2]] = list(array(1:9, dim = c(3,3)), array(5:13, dim = c(3,3)), array(7:15, dim = c(3,3))) -outputs = array(runif(9*20)*15, dim = c(3,3,20)) +data = array(runif(9*20)*15, dim = c(3,3,20)) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -proto_map_algo(gamma_list = gamma, outputs = outputs, distance_func = distance_func) +find_prototypes(nb_cells = 3, data = data, +multistart = 2, distance_func = distance_func) } diff --git a/man/fit_metamodel.Rd b/man/fit_metamodel.Rd index 829967e..1617cde 100644 --- a/man/fit_metamodel.Rd +++ b/man/fit_metamodel.Rd @@ -97,8 +97,9 @@ for each principal component. At this stage, the parameters must be provided as \item{threshold_classification}{The threshold that creates the two classes of maps for the classification} -\item{threshold_fpca}{The threshold used for the training of the FPCA. Only the maps for which the sum of the pixel is above this threshold are used for the training. If NULL, this threshold takes the value of threshold_classification. -#' @param ... other parameters of \code{\link{km}} function from \code{DiceKriging}.} +\item{threshold_fpca}{The threshold used for the training of the FPCA. Only the maps for which the sum of the pixel is above this threshold are used for the training. If NULL, this threshold takes the value of threshold_classification.} + +\item{...}{other parameters of \code{\link{km}} function from \code{DiceKriging}.} } \value{ An list containing : @@ -123,7 +124,7 @@ return(Ymaps) library(randtoolbox) design = as.data.frame(sobol(250,2))*2-1 outputs = func2D(design) -fit_metamodel = fit_metamodel(design_train = design, outputs_train = outputs_train, +fit_metamodel = fit_metamodel(design_train = design, outputs_train = outputs, ncoeff = 400, npc = 6, control = list(trace = FALSE), classification = TRUE, control_classification = list(nodesize = 4), threshold_classification = 2) } diff --git a/man/get_cell_numbers.Rd b/man/get_cell_numbers.Rd index d47ca4e..d8f81e9 100644 --- a/man/get_cell_numbers.Rd +++ b/man/get_cell_numbers.Rd @@ -5,19 +5,19 @@ \title{Provide the Voronoï cell number associated to each sample} \usage{ get_cell_numbers( - outputs, - gamma, + data, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) } ) } \arguments{ -\item{outputs}{The output samples that need to be quantized. Useful only if cell_numbers == NULL.} +\item{data}{The data that needs to be quantized. Useful only if cell_numbers == NULL.} -\item{gamma}{A set of prototypes. Useful only if cell_numbers == NULL.} +\item{prototypes}{A set of prototypes. Useful only if cell_numbers == NULL.} -\item{distance_func}{A function computing a distance between two elements in the output spaces. Useful only if cell_numbers == NULL.} +\item{distance_func}{A function computing a distance between two data elements. Useful only if cell_numbers == NULL.} } \value{ A vector providing the Voronoï cell number associated to each sample. @@ -26,8 +26,8 @@ A vector providing the Voronoï cell number associated to each sample. Provide the Voronoï cell number associated to each sample } \examples{ -gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) -outputs = array(runif(9*20)*20, dim = c(3,3,20)) +prototypes = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) +data = array(runif(9*20)*20, dim = c(3,3,20)) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -get_cell_numbers(outputs = outputs, gamma = gamma, distance_func = distance_func) +get_cell_numbers(data = data, prototypes = prototypes, distance_func = distance_func) } diff --git a/man/get_probas.Rd b/man/get_probas.Rd index 329a708..689f176 100644 --- a/man/get_probas.Rd +++ b/man/get_probas.Rd @@ -8,8 +8,8 @@ get_probas( density_ratio, method_IS = "unique", cell_numbers = NULL, - outputs = NULL, - gamma = NULL, + data = NULL, + prototypes = NULL, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -18,17 +18,17 @@ get_probas( ) } \arguments{ -\item{density_ratio}{density_ratio indicates the weight fX/g of each output} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element} \item{method_IS}{The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell. Default is "unique".} -\item{cell_numbers}{The voronoi cell number of every output. If NULL, then outputs, gamma and distance_func must be provided.} +\item{cell_numbers}{The voronoi cell number of every data element. If NULL, then data, prototypes and distance_func must be provided.} -\item{outputs}{The output samples that need to be quantized. Useful only if cell_numbers == NULL.} +\item{data}{The data that needs to be quantized. Useful only if cell_numbers == NULL.} -\item{gamma}{A set of prototypes. Useful only if cell_numbers == NULL. If NULL, "cells" must be provided.} +\item{prototypes}{A set of prototypes. Useful only if cell_numbers == NULL. If NULL, "cells" must be provided.} -\item{distance_func}{A function computing a distance between two elements in the output spaces. Useful only if cell_numbers == NULL.} +\item{distance_func}{A function computing a distance between two data elements. Useful only if cell_numbers == NULL.} \item{cells}{The cell numbers that are investigated} @@ -41,10 +41,10 @@ A vector providing the probability masses of each Voronoï cell. Computing the probability masses of each voronoi cells } \examples{ -gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) -outputs = array(runif(9*20)*20, dim = c(3,3,20)) +prototypes = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) +data = array(runif(9*20)*20, dim = c(3,3,20)) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} density_ratio = rep(1,20) -get_probas(density_ratio = density_ratio, outputs = outputs, -gamma = gamma, distance_func = distance_func) +get_probas(density_ratio = density_ratio, data = data, +prototypes = prototypes, distance_func = distance_func) } diff --git a/man/oned_to_matrix.Rd b/man/oned_to_matrix.Rd index 08eeefc..7d1d3ee 100644 --- a/man/oned_to_matrix.Rd +++ b/man/oned_to_matrix.Rd @@ -4,10 +4,10 @@ \alias{oned_to_matrix} \title{Transform every 1D prototype into a column matrix} \usage{ -oned_to_matrix(gamma) +oned_to_matrix(prototypes) } \arguments{ -\item{gamma}{a list of prototypes} +\item{prototypes}{a list of prototypes} } \value{ A list of matrices prototypes diff --git a/man/probas_k_fold.Rd b/man/probas_k_fold.Rd index d6116c1..dfaf891 100644 --- a/man/probas_k_fold.Rd +++ b/man/probas_k_fold.Rd @@ -8,7 +8,7 @@ probas_k_fold( outputs, nb_folds, density_ratio, - gamma, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -34,7 +34,7 @@ probas_k_fold( control = NULL, type = "UK", seed = NULL, - bias = rep(0, length(gamma)), + bias = rep(0, length(prototypes)), ... ) } @@ -45,7 +45,7 @@ probas_k_fold( \item{density_ratio}{density_ratio indicates the weight fX/g of each output} -\item{gamma}{A set of l prototypes defining the Voronoï cells} +\item{prototypes}{A set of l prototypes defining the Voronoï cells} \item{distance_func}{A function computing a distance between two elements in the output spaces.} @@ -131,11 +131,11 @@ return(Ymaps) } design = data.frame(X = seq(-1,1,l= 20)) outputs = func2D(design) -gamma = lapply(c(1,5,10,15,20), function(i){outputs[,,i]}) +prototypes = lapply(c(1,5,10,15,20), function(i){outputs[,,i]}) density_ratio = rep(1, 20) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} list_probas_k_fold = probas_k_fold(outputs = outputs, nb_folds = 5, -density_ratio = density_ratio, gamma = gamma, distance_func = distance_func +density_ratio = density_ratio, prototypes = prototypes, distance_func = distance_func , ncoeff_vec = c(50,100,200,400), npc_vec = 2:4, design = design, control = list(trace = FALSE)) } diff --git a/man/probas_loo.Rd b/man/probas_loo.Rd index 86d04c6..35ee4bf 100644 --- a/man/probas_loo.Rd +++ b/man/probas_loo.Rd @@ -7,7 +7,7 @@ probas_loo( outputs, density_ratio, - gamma, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -42,7 +42,7 @@ probas_loo( \item{density_ratio}{density_ratio indicates the weight fX/g of each output} -\item{gamma}{A set of l prototypes defining the Voronoï cells} +\item{prototypes}{A set of l prototypes defining the Voronoï cells} \item{distance_func}{A function computing a distance between two elements in the output spaces.} @@ -127,10 +127,10 @@ return(Ymaps) } design = data.frame(X = seq(-1,1,l= 20)) outputs = func2D(design) -gamma = lapply(c(1,3,6,8,10,14,16,18), function(i){outputs[,,i]}) +prototypes = lapply(c(1,3,6,8,10,14,16,18), function(i){outputs[,,i]}) density_ratio = rep(1, 20) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} list_probas_loo = probas_loo(outputs = outputs, density_ratio = density_ratio, - gamma = gamma, distance_func = distance_func, ncoeff_vec = c(50,100,200,400), + prototypes = prototypes, distance_func = distance_func, ncoeff_vec = c(50,100,200,400), npc_vec = 2:4, design = design, control = list(trace = FALSE)) } diff --git a/man/probas_training_test.Rd b/man/probas_training_test.Rd index 49989f4..79d32c2 100644 --- a/man/probas_training_test.Rd +++ b/man/probas_training_test.Rd @@ -8,7 +8,7 @@ probas_training_test( outputs_train, outputs_test, density_ratio, - gamma, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -35,7 +35,7 @@ probas_training_test( kernel = NULL, control = NULL, type = "UK", - bias = rep(0, length(gamma)), + bias = rep(0, length(prototypes)), ... ) } @@ -46,7 +46,7 @@ probas_training_test( \item{density_ratio}{density_ratio indicates the weight fX/g of each output} -\item{gamma}{A set of l prototypes defining the Voronoï cells} +\item{prototypes}{A set of l prototypes defining the Voronoï cells} \item{distance_func}{A function computing a distance between two elements in the output spaces.} @@ -136,10 +136,10 @@ design_train = data.frame(X = seq(-1,1,l= 8)) outputs_train = func2D(design_train) design_test = data.frame(X = seq(-0.99,0.99,l=50)) outputs_test = func2D(design_test) -gamma = lapply(c(10,20,30,40,50), function(i){outputs_test[,,i]}) +prototypes = lapply(c(10,20,30,40,50), function(i){outputs_test[,,i]}) density_ratio = rep(1, 50) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -list_probas_train_test = probas_training_test(gamma = gamma, +list_probas_train_test = probas_training_test(prototypes = prototypes, density_ratio = density_ratio, distance_func = distance_func, return_pred = TRUE, outputs_train = outputs_train, outputs_test = outputs_test, ncoeff_vec = c(50,100,200,400), npc_vec = 2:4, design_train = design_train, diff --git a/man/quanti_error.Rd b/man/quanti_error.Rd index 6f39f5a..b88c999 100644 --- a/man/quanti_error.Rd +++ b/man/quanti_error.Rd @@ -5,22 +5,25 @@ \title{Compututation of the empirical quantization error} \usage{ quanti_error( - outputs, - gamma, + data, + prototypes, density_ratio, + batch = FALSE, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) } ) } \arguments{ -\item{outputs}{The output samples that need to be quantized. Useful only if cell_numbers == NULL.} +\item{data}{The data that needs to be quantized. Useful only if cell_numbers == NULL.} -\item{gamma}{A set of prototypes. Useful only if cell_numbers == NULL.} +\item{prototypes}{A set of prototypes. Useful only if cell_numbers == NULL.} -\item{density_ratio}{density_ratio indicates the weight fX/g of each output} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element.} -\item{distance_func}{A function computing a distance between two elements in the output spaces. Useful only if cell_numbers == NULL.} +\item{batch}{A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.} + +\item{distance_func}{A function computing a distance between two data elements. Useful only if cell_numbers == NULL.} } \value{ An estimation of the quantization error @@ -29,10 +32,10 @@ An estimation of the quantization error Compututation of the empirical quantization error } \examples{ -gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) -outputs = array(runif(9*20)*20, dim = c(3,3,20)) +prototypes = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3))) +data = array(runif(9*20)*20, dim = c(3,3,20)) density_ratio = rep(1,20) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -quanti_error(outputs = outputs, gamma = gamma, density_ratio = density_ratio, +quanti_error(data = data, prototypes = prototypes, density_ratio = density_ratio, distance_func = distance_func) } diff --git a/man/rf_probas_k_fold.Rd b/man/rf_probas_k_fold.Rd index 0608e94..5310e24 100644 --- a/man/rf_probas_k_fold.Rd +++ b/man/rf_probas_k_fold.Rd @@ -12,7 +12,7 @@ rf_probas_k_fold( list_search, nb_folds, density_ratio, - gamma, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -57,7 +57,7 @@ rf_probas_k_fold( \item{density_ratio}{density_ratio indicates the weight fX/g of each output} -\item{gamma}{A set of l prototypes defining the Voronoï cells} +\item{prototypes}{A set of l prototypes defining the Voronoï cells} \item{distance_func}{A function computing a distance between two elements in the output spaces.} @@ -145,10 +145,12 @@ design = as.data.frame(sobol(100,2))*2-1 outputs = func2D(design) list_search = list("nodesize" = as.list(c(1,3,5,7,9,11))) density_ratio = rep(1,100) -gamma = lapply(c(2,3,51,7), function(i){outputs[,,i]}) +prototypes = lapply(c(2,3,51,7), function(i){outputs[,,i]}) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -rf_probas_k_fold(design = design,outputs = outputs, threshold_classification = 2, threshold_fpca = 0, - list_search = list_search, nb_folds = 10,density_ratio = - density_ratio, gamma = gamma, distance_func= distance_func, ncoeff = 400, - npc = 6, control = list(trace = FALSE)) +rf_probas_k_fold(design = design,outputs = outputs, +threshold_classification = 2, threshold_fpca = 0, +list_search = list_search, nb_folds = 10, +density_ratio = density_ratio, prototypes = prototypes, +distance_func= distance_func, ncoeff = 400, +npc = 6, control = list(trace = FALSE)) } diff --git a/man/rf_probas_training_test.Rd b/man/rf_probas_training_test.Rd index 263de79..1dbea8c 100644 --- a/man/rf_probas_training_test.Rd +++ b/man/rf_probas_training_test.Rd @@ -13,7 +13,7 @@ rf_probas_training_test( threshold_fpca = NULL, list_search, density_ratio, - gamma, + prototypes, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) }, @@ -64,7 +64,7 @@ to the ith evaluation.} \item{density_ratio}{density_ratio indicates the weight fX/g of each output} -\item{gamma}{A set of l prototypes defining the Voronoï cells} +\item{prototypes}{A set of l prototypes defining the Voronoï cells} \item{distance_func}{A function computing a distance between two elements in the output spaces.} @@ -158,11 +158,14 @@ df_search = expand.grid(seq(0.1,1,0.3), c(1,5,9,13,17)) list_search = list("nodesize" = as.list(df_search[,2]), "classwt" = lapply(1:nrow(df_search), function(i){c(df_search[i,1], 1-df_search[i,1])})) density_ratio = rep(1,50) -gamma = lapply(c(2,3,51,7), function(i){outputs[,,i]}) +prototypes = lapply(c(2,3,51,7), function(i){outputs[,,i]}) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -list_rf_prob_train_test = rf_probas_training_test(design_train = design_train, +list_rf_prob_train_test = rf_probas_training_test(design_train = +design_train, design_test = design_test, outputs_train = outputs_train, -outputs_test = outputs_test, threshold_classification = 2, threshold_fpca = 0, list_search = list_search, -density_ratio = density_ratio, gamma = gamma, distance_func= distance_func, +outputs_test = outputs_test, threshold_classification = 2, + threshold_fpca = 0, list_search = list_search, +density_ratio = density_ratio, prototypes = prototypes, +distance_func= distance_func, ncoeff = 400, npc = 6, control = list(trace = FALSE)) } diff --git a/man/rf_rmse_k_fold.Rd b/man/rf_rmse_k_fold.Rd index 1337723..ac1603b 100644 --- a/man/rf_rmse_k_fold.Rd +++ b/man/rf_rmse_k_fold.Rd @@ -130,6 +130,7 @@ design = as.data.frame(sobol(100,2))*2-1 outputs = func2D(design) list_search = list("nodesize" = as.list(c(1,3,5,7,9,11))) list_rf_rmse_k_fold = rf_rmse_k_fold(design = design,outputs = outputs, - threshold_classification = 2, threshold_fpca = 0, list_search = list_search, nb_folds = 10, ncoeff = 400, + threshold_classification = 2, threshold_fpca = 0, + list_search = list_search, nb_folds = 10, ncoeff = 400, npc = 6, control = list(trace = FALSE)) } diff --git a/man/rf_rmse_training_test.Rd b/man/rf_rmse_training_test.Rd index 58f85a0..a40a1fa 100644 --- a/man/rf_rmse_training_test.Rd +++ b/man/rf_rmse_training_test.Rd @@ -143,8 +143,11 @@ df_search = expand.grid(seq(0.1,1,0.3), c(1,5,9,13,17)) list_search = list("nodesize" = as.list(df_search[,2]), "classwt" = lapply(1:nrow(df_search), function(i){c( df_search[i,1], 1-df_search[i,1])})) -list_rf_rmse_train_test = rf_rmse_training_test(design_train = design_train, - design_test = design_test, outputs_train = outputs_train, outputs_test = - outputs_test, threshold_classification = 2, threshold_fpca = 0, list_search = list_search, ncoeff = 400, +list_rf_rmse_train_test = rf_rmse_training_test(design_train = +design_train, + design_test = design_test, outputs_train = outputs_train, + outputs_test = + outputs_test, threshold_classification = 2, + threshold_fpca = 0, list_search = list_search, ncoeff = 400, npc = 6, control = list(trace = FALSE)) } diff --git a/man/sort_gamma.Rd b/man/sort_prototypes.Rd similarity index 56% rename from man/sort_gamma.Rd rename to man/sort_prototypes.Rd index 34d822a..d65b473 100644 --- a/man/sort_gamma.Rd +++ b/man/sort_prototypes.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sort_gamma.R -\name{sort_gamma} -\alias{sort_gamma} +% Please edit documentation in R/sort_prototypes.R +\name{sort_prototypes} +\alias{sort_prototypes} \title{Sorting the prototypes by increasing sum of their elements (absolute value)} \usage{ -sort_gamma(gamma) +sort_prototypes(prototypes) } \arguments{ -\item{gamma}{A set of prototypes} +\item{prototypes}{A set of prototypes} } \value{ The same set of prototypes but sorted by increasing sum of their elements (absolute value) @@ -17,5 +17,6 @@ Sorting the prototypes by increasing sum of their elements (absolute value) } \examples{ -sort_gamma(gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3)))) +sort_prototypes(prototypes = list(array(10, dim = c(3,3)), +array(5, dim = c(3,3)), array(6, dim = c(3,3)))) } diff --git a/man/std_centroid.Rd b/man/std_centroid.Rd index eb3cf0b..9e95fa0 100644 --- a/man/std_centroid.Rd +++ b/man/std_centroid.Rd @@ -5,8 +5,8 @@ \title{Computation of the IS centroid standard deviation for different sets of prototypes.} \usage{ std_centroid( - outputs, - gamma_list, + data, + prototypes_list, density_ratio, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) @@ -17,17 +17,17 @@ std_centroid( ) } \arguments{ -\item{outputs}{The output samples that need to be quantized. Useful only if cell_numbers == NULL.} +\item{data}{The data samples that need to be quantized. Useful only if cell_numbers == NULL.} -\item{gamma_list}{A list of gamma on which we want to evaluate the IS centroid standard deviation. Each gamma is a set of prototypes.} +\item{prototypes_list}{A list of set of prototypes on which we want to evaluate the IS centroid standard deviation. Each element is a list of prototypes.} -\item{density_ratio}{density_ratio indicates the weight fX/g of each output.} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element.} -\item{distance_func}{A function computing a distance between two elements in the output spaces.} +\item{distance_func}{A function computing a distance between two data elements.} \item{cells}{The Voronoï cell numbers that we are investigating.} -\item{cell_numbers}{An optional list providing for each set of prototypes the voronoi cell number of every output.} +\item{cell_numbers}{An optional list providing for each set of prototypes the voronoi cell number of every data element.} \item{nv}{The size of the sample for which we want to estimate the IS centroid standard deviation.} } @@ -47,11 +47,11 @@ Ymaps<- array(unlist(Y),dim=c(20,20,n)) return(Ymaps) } design = data.frame(X = seq(-1,1,l= 50)) -outputs = func2D(design) -gamma_list = list(lapply(c(1,3,10,14,18), function(i){outputs[,,i]})) +data = func2D(design) +prototypes_list = list(lapply(c(1,3,10,14,18), function(i){data[,,i]})) density_ratio = rep(1, 50) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -list_std_centroid = std_centroid(outputs = outputs, gamma_list = -gamma_list, density_ratio = density_ratio, distance_func = distance_func -, cells = 1:length(gamma_list[[1]]), nv = 50) +list_std_centroid = std_centroid(data = data, prototypes_list = +prototypes_list, density_ratio = density_ratio, distance_func = distance_func +, cells = 1:length(prototypes_list[[1]]), nv = 50) } diff --git a/man/std_proba.Rd b/man/std_proba.Rd index 65ee188..ff2d904 100644 --- a/man/std_proba.Rd +++ b/man/std_proba.Rd @@ -5,8 +5,8 @@ \title{Computation of the IS coefficients of variation of the membership probability for different set of prototypes.} \usage{ std_proba( - outputs, - gamma_list, + data, + prototypes_list, density_ratio, distance_func = function(A1, A2) { return(sqrt(sum((A1 - A2)^2))) @@ -17,17 +17,17 @@ std_proba( ) } \arguments{ -\item{outputs}{The output samples that need to be quantized. Useful only if cell_numbers == NULL.} +\item{data}{The data that needs to be quantized. Useful only if cell_numbers == NULL.} -\item{gamma_list}{A list of gamma on which we want to evaluate the IS coefficient of variation of the membership probability. Each gamma is a set of prototypes} +\item{prototypes_list}{A list of set of prototypes on which we want to evaluate the IS centroid standard deviation. Each element is a list of prototypes.} -\item{density_ratio}{density_ratio indicates the weight fX/g of each output} +\item{density_ratio}{density_ratio indicates the weight fX/g of each data element.} -\item{distance_func}{A function computing a distance between two elements in the output spaces.} +\item{distance_func}{A function computing a distance between two data elements.} \item{cells}{The Voronoï cell numbers that we are investigating} -\item{cell_numbers}{An optional list providing for each set of prototypes the voronoi cell number of every output.} +\item{cell_numbers}{An optional list providing for each set of prototypes the voronoi cell number of every data element.} \item{nv}{The size of the sample for which we want to estimate the IS coefficient of variation of the membership probability} } @@ -47,11 +47,11 @@ Ymaps<- array(unlist(Y),dim=c(20,20,n)) return(Ymaps) } design = data.frame(X = seq(-1,1,l= 50)) -outputs = func2D(design) -gamma_list = list(lapply(c(1,3,10,14,18), function(i){outputs[,,i]})) +data = func2D(design) +prototypes_list = list(lapply(c(1,3,10,14,18), function(i){data[,,i]})) density_ratio = rep(1, 50) distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))} -list_std_proba = std_proba(outputs = outputs, gamma_list = gamma_list, +list_std_proba = std_proba(data = data, prototypes_list = prototypes_list, density_ratio = density_ratio, distance_func = distance_func, - cells = 1:length(gamma_list[[1]]), nv = 50) + cells = 1:length(prototypes_list[[1]]), nv = 50) } diff --git a/vignettes/long_example.Rmd b/vignettes/long_example.Rmd index 89c8297..f30c3a1 100644 --- a/vignettes/long_example.Rmd +++ b/vignettes/long_example.Rmd @@ -45,11 +45,11 @@ We create a function to plot the spatial outputs ```{r} plot_map = function(map, max = NULL, min = NULL){ - gamma_toplot = expand.grid(seq(-5,5,l=20),seq(-5,5,l=20)) - gamma_toplot$f = as.numeric(map) + prototypes_toplot = expand.grid(seq(-5,5,l=20),seq(-5,5,l=20)) + prototypes_toplot$f = as.numeric(map) if(is.null(max)) - {p = ggplot(gamma_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis",direction = -1, name = "h") + theme_bw()} - else{p = ggplot(gamma_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis", direction = -1, limits = c(min, max), name = "h") + theme_bw() + theme(legend.text = element_text(size=13),legend.title = element_text(size=13))} + {p = ggplot(prototypes_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis",direction = -1, name = "h") + theme_bw()} + else{p = ggplot(prototypes_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis", direction = -1, limits = c(min, max), name = "h") + theme_bw() + theme(legend.text = element_text(size=13),legend.title = element_text(size=13))} return(p) } ``` @@ -115,10 +115,10 @@ We perform the quantization without metamodel, only with the 400 computed maps ```{r} sum_depth = Vectorize(function(i){sum(outputs[,,i])})(1:dim(outputs)[3]) -gamma = lapply(1:5, function(i){outputs[,,which.min(abs(as.numeric(quantile(sum_depth,c(0,0.6,0.7,0.8,0.9))[i]) - sum_depth))[1]]}) #gamma will be the starting set of prototypes of the quantization. We choose an maps of only 0, and the maps with increasing sum of pixels +starting_proto = lapply(1:5, function(i){outputs[,,which.min(abs(as.numeric(quantile(sum_depth,c(0,0.6,0.7,0.8,0.9))[i]) - sum_depth))[1]]}) #starting_proto will be the starting set of prototypes of the quantization. We choose an maps of only 0, and the maps with increasing sum of pixels -res_proto = proto_map_algo(gamma = gamma, outputs = outputs, density_ratio = density_ratio, distance_func = distance_func) -gamma_star_apriori = res_proto$gamma #This is the optimal set of prototype maps obtained with the 400 maps without metamodel +res_proto = find_prototypes(starting_proto = starting_proto, data = outputs, density_ratio = density_ratio, distance_func = distance_func) +prototypes_apriori = res_proto$prototypes #This is the optimal set of prototype maps obtained with the 400 maps without metamodel ``` @@ -142,10 +142,10 @@ best_params plot_map(list_rmse_k_fold$outputs_rmse[[2]]) ``` -We can also tune the hyperparameters regarding the relative error made when computing the Voronoï cells membership probabilities (for a given set of prototypes gamma) with the predicted maps instead of the true maps. We use the function probas_k_fold to this end. +We can also tune the hyperparameters regarding the relative error made when computing the Voronoï cells membership probabilities (for a given set of prototypes) with the predicted maps instead of the true maps. We use the function probas_k_fold to this end. ```{r} -list_probas_k_fold = probas_k_fold(outputs = outputs[,, sum_depth > 0], design = design[sum_depth > 0,], nb_folds = 10, density_ratio = density_ratio[sum_depth >0], gamma = gamma_star_apriori, distance_func = distance_func, npc_vec = npc_vec, ncoeff_vec = ncoeff_vec, seed = 10, control = list(trace = FALSE)) +list_probas_k_fold = probas_k_fold(outputs = outputs[,, sum_depth > 0], design = design[sum_depth > 0,], nb_folds = 10, density_ratio = density_ratio[sum_depth >0], prototypes = prototypes_apriori, distance_func = distance_func, npc_vec = npc_vec, ncoeff_vec = ncoeff_vec, seed = 10, control = list(trace = FALSE)) ``` @@ -168,7 +168,7 @@ plot_map(list_rf_rmse_k_fold$outputs_rmse[[1]]) Likewise, rf_probas_k_fold does the same as probas_k_fold but integrates the random forests classifier, focusing on the classifier hyperparameters. ```{r} -list_rf_probas_k_fold = rf_probas_k_fold(design = design,outputs = outputs, density_ratio = density_ratio, gamma = gamma_star_apriori, distance_func = distance_func,threshold_classification = 0, list_search = list_search, nb_folds = 10, ncoeff = 200, npc = 2, control = list(trace = FALSE), seed = 10) +list_rf_probas_k_fold = rf_probas_k_fold(design = design,outputs = outputs, density_ratio = density_ratio, prototypes = prototypes_apriori, distance_func = distance_func,threshold_classification = 0, list_search = list_search, nb_folds = 10, ncoeff = 200, npc = 2, control = list(trace = FALSE), seed = 10) ``` @@ -191,15 +191,15 @@ And we can perform the quantization on our $10^4$ predicted maps. ```{r} # We perform quantization -res_proto_pred = proto_map_algo(gamma = gamma_star_apriori, outputs = outputs_pred, density_ratio = density_ratio_pred, distance_func = distance_func, trace = FALSE) +res_proto_pred = find_prototypes(starting_proto = prototypes_apriori, data = outputs_pred, density_ratio = density_ratio_pred, distance_func = distance_func, trace = FALSE) ``` We plot the obtained prototype maps ```{r} list_plots_pred = list() -for(i in 1:length(res_proto_pred$gamma)){ - list_plots_pred[[i]] = plot_map(res_proto_pred$gamma[[i]]) +for(i in 1:length(res_proto_pred$prototypes)){ + list_plots_pred[[i]] = plot_map(res_proto_pred$prototypes[[i]]) } do.call("grid.arrange", c(list_plots_pred, ncol=2)) ``` @@ -208,7 +208,7 @@ do.call("grid.arrange", c(list_plots_pred, ncol=2)) We estimate the relative standard deviation of the membership probabilities of the obtained optimal Voronoi cells. ```{r} -res_std_probas = std_proba(outputs_pred, gamma_list = list(res_proto_pred$gamma), density_ratio = density_ratio_pred, distance_func = distance_func, cells = 1:5, nv = 10^4) +res_std_probas = std_proba(outputs_pred, prototypes_list = list(res_proto_pred$prototypes), density_ratio = density_ratio_pred, distance_func = distance_func, cells = 1:5, nv = 10^4) res_std_probas ``` @@ -216,13 +216,13 @@ res_std_probas We also estimate the standard deviation of the estimation of each optimal prototype pixel ```{r} -res_std_centroid = std_centroid(outputs = outputs_pred, gamma_list = list(res_proto_pred$gamma), density_ratio = density_ratio_pred, distance_func = distance_func, cells = 1:5, nv = 10^4) +res_std_centroid = std_centroid(data = outputs_pred, prototypes_list = list(res_proto_pred$prototypes), density_ratio = density_ratio_pred, distance_func = distance_func, cells = 1:5, nv = 10^4) #Then we compute the quantile 90% of each of these standard deviation maps quantiles_90_std = sapply(res_std_centroid[[1]], function(x){quantile(x, 0.9)}) list_plots_std = list() -for(i in 1:length(res_proto_pred$gamma)){ +for(i in 1:length(res_proto_pred$prototypes)){ list_plots_std[[i]] = plot_map(res_std_centroid[[1]][[i]]) } do.call("grid.arrange", c(list_plots_std, ncol=2)) @@ -233,7 +233,7 @@ do.call("grid.arrange", c(list_plots_std, ncol=2)) We now confirm the precision of the metamodel by comparing the probabilities obtained with the true maps and the predicted maps for the optimal prototypes ```{r} -probas_true = get_probas(density_ratio = density_ratio_pred, gamma = res_proto_pred$gamma, outputs = outputs_test, distance_func = distance_func) +probas_true = get_probas(density_ratio = density_ratio_pred, prototypes = res_proto_pred$prototypes, data = outputs_test, distance_func = distance_func) probas_pred = res_proto_pred$probas probas_true @@ -243,9 +243,9 @@ probas_pred We also check that the quantization error obtained with the predicted maps is close to the one obtained with the true maps ```{r} -res_proto_test = proto_map_algo(gamma = gamma_star_apriori, outputs = outputs_test, density_ratio = density_ratio_pred, distance_func = distance_func, trace = FALSE) -quanti_error_test = quanti_error(outputs = outputs_test, gamma = res_proto_test$gamma, density_ratio = density_ratio_pred, distance_func = distance_func) -quanti_error_pred = quanti_error(outputs = outputs_test, gamma = res_proto_pred$gamma, density_ratio = density_ratio_pred, distance_func = distance_func) +res_proto_test = find_prototypes(starting_proto = prototypes_apriori, data = outputs_test, density_ratio = density_ratio_pred, distance_func = distance_func, trace = FALSE) +quanti_error_test = quanti_error(data = outputs_test, prototypes = res_proto_test$prototypes, density_ratio = density_ratio_pred, distance_func = distance_func) +quanti_error_pred = quanti_error(data = outputs_test, prototypes = res_proto_pred$prototypes, density_ratio = density_ratio_pred, distance_func = distance_func) quanti_error_pred quanti_error_test diff --git a/vignettes/short_example.Rmd b/vignettes/short_example.Rmd index 90d074b..08cddf1 100644 --- a/vignettes/short_example.Rmd +++ b/vignettes/short_example.Rmd @@ -43,11 +43,11 @@ We create a function to plot the spatial outputs ```{r} plot_map = function(map, max = NULL, min = NULL){ - gamma_toplot = expand.grid(seq(-5,5,l=20),seq(-5,5,l=20)) - gamma_toplot$f = as.numeric(map) + prototypes_toplot = expand.grid(seq(-5,5,l=20),seq(-5,5,l=20)) + prototypes_toplot$f = as.numeric(map) if(is.null(max)) - {p = ggplot(gamma_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis",direction = -1, name = "h") + theme_bw()} - else{p = ggplot(gamma_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis", direction = -1, limits = c(min, max), name = "h") + theme_bw() + theme(legend.text = element_text(size=13),legend.title = element_text(size=13))} + {p = ggplot(prototypes_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis",direction = -1, name = "h") + theme_bw()} + else{p = ggplot(prototypes_toplot) + geom_raster(aes(x = Var1, y = Var2, fill = f)) + scale_fill_continuous(type = "viridis", direction = -1, limits = c(min, max), name = "h") + theme_bw() + theme(legend.text = element_text(size=13),legend.title = element_text(size=13))} return(p) } ``` @@ -63,13 +63,13 @@ We can first perform quantization without considering the probabilistic distribu ```{r} sum_depth = Vectorize(function(i){sum(outputs[,,i])})(1:dim(outputs)[3]) -gamma = lapply(1:5, function(i){outputs[,,which.min(abs(as.numeric(quantile(sum_depth,c(0,0.6,0.7,0.8,0.9))[i]) - sum_depth))[1]]}) #gamma will be the starting set of prototypes of the quantization. We choose an maps of only 0, and the maps with increasing sum of pixels +starting_proto = lapply(1:5, function(i){outputs[,,which.min(abs(as.numeric(quantile(sum_depth,c(0,0.6,0.7,0.8,0.9))[i]) - sum_depth))[1]]}) #starting_proto will be the starting set of prototypes of the quantization. We choose an maps of only 0, and the maps with increasing sum of pixels -res_proto_1 = proto_map_algo(gamma = gamma, outputs = outputs) +res_proto_1 = find_prototypes(starting_proto = starting_proto, data = outputs) list_plots_1 = list() -for(i in 1:length(res_proto_1$gamma)){ - list_plots_1[[i]] = plot_map(res_proto_1$gamma[[i]]) +for(i in 1:length(res_proto_1$prototypes)){ + list_plots_1[[i]] = plot_map(res_proto_1$prototypes[[i]]) } do.call("grid.arrange", c(list_plots_1, ncol=2)) @@ -104,11 +104,11 @@ Then we can perform a probabilistic quantization ```{r} density_ratio = compute_density_ratio(f = fX, g = g, inputs = design) -res_proto_2 = proto_map_algo(gamma = gamma, outputs = outputs, density_ratio = density_ratio) +res_proto_2 = find_prototypes(starting_proto = starting_proto, data = outputs, density_ratio = density_ratio) list_plots_2 = list() -for(i in 1:length(res_proto_2$gamma)){ - list_plots_2[[i]] = plot_map(res_proto_2$gamma[[i]]) +for(i in 1:length(res_proto_2$prototypes)){ + list_plots_2[[i]] = plot_map(res_proto_2$prototypes[[i]]) } do.call("grid.arrange", c(list_plots_2, ncol=2)) diff --git a/vignettes/short_example_1D.Rmd b/vignettes/short_example_1D.Rmd index 1f05b5e..0d8d231 100644 --- a/vignettes/short_example_1D.Rmd +++ b/vignettes/short_example_1D.Rmd @@ -35,7 +35,7 @@ We can first perform quantization without considering the probabilistic distribu ```{r} -res_proto = proto_map_algo(nb_cells = 4, outputs = outputs, multistart = 6) +res_proto = find_prototypes(nb_cells = 4, data = outputs, multistart = 6) res_proto ``` @@ -68,7 +68,7 @@ Then we can perform a probabilistic quantization ```{r} density_ratio = compute_density_ratio(f = fX, g = g, inputs = design) -res_proto_2 = proto_map_algo(nb_cells = 4, outputs = outputs, multistart = 6,density_ratio = density_ratio) +res_proto_2 = find_prototypes(nb_cells = 4, data = outputs, multistart = 6,density_ratio = density_ratio) res_proto_2