Skip to content

Commit

Permalink
formation
Browse files Browse the repository at this point in the history
  • Loading branch information
SIRE Charlie committed Sep 19, 2023
1 parent 81d17dd commit 72782d4
Show file tree
Hide file tree
Showing 13 changed files with 49 additions and 26 deletions.
11 changes: 7 additions & 4 deletions R/distinct_prototypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
#' @examples
#' distinct_prototypes(list(1,2,34,1))
distinct_prototypes = function(prototypes){
for(i in 1:(length(prototypes)-1)){
dist_prototypes = distance_to_prototypes(prototypes[[i]], lapply((i+1):length(prototypes), function(j){prototypes[[j]]}))$dist
if(dist_prototypes == 0){return(FALSE)}
if(length(prototypes) == 1){return(TRUE)}
else{
for(i in 1:(length(prototypes)-1)){
dist_prototypes = distance_to_prototypes(prototypes[[i]], lapply((i+1):length(prototypes), function(j){prototypes[[j]]}))$dist
if(dist_prototypes == 0){return(FALSE)}
}
return(TRUE)
}
return(TRUE)
}
31 changes: 24 additions & 7 deletions R/find_prototypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' @param threshold A real positive number. When the distance between the new centroids and the previous ones is lower than this value, then we stop the algorithm.
#' @param trace A boolean. If TRUE, tracing information on the progress of the algorithm is produced. Default is FALSE.
#' @param bias A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells.
#' @param index_sampling_error Required only if method_IS = "percell". Indicates which of the data samples must be used for the computation of the quantization error.
#' @param all_starts A boolean indicating whether the function should return the optimal prototypes obtained for each start.
#' @param seed An optional random seed.
#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.
Expand Down Expand Up @@ -44,7 +43,7 @@
#' find_prototypes(nb_cells = 3, data = data,
#' multistart = 2, distance_func = distance_func)

find_prototypes = function(starting_proto = NULL, nb_cells = NULL, data = NULL, multistart = 1, method_IS = "unique", sampling_cells = 1:length(data), density_ratio = rep(1, dim(data)[length(dim(data))]), budget = 10^3, threshold = 0, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},print_progress = FALSE, trace = FALSE, all_starts = FALSE,bias = NULL, index_sampling_error = 1, seed = NULL, batch = FALSE, density_function = NULL, density_biased_function = NULL, inputs_ref = NULL, data_ref = NULL, inputs_function = NULL, outputs_function = NULL){
find_prototypes = function(starting_proto = NULL, nb_cells = NULL, data = NULL, multistart = 1, method_IS = "unique", sampling_cells = 1:length(data), density_ratio = rep(1, dim(data)[length(dim(data))]), budget = 10^3, threshold = 0, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},print_progress = FALSE, trace = FALSE, all_starts = FALSE,bias = NULL, seed = NULL, batch = FALSE, density_function = NULL, density_biased_function = NULL, inputs_ref = NULL, data_ref = NULL, inputs_function = NULL, outputs_function = NULL){
data_bool = is.null(data)
if(is.null(dim(data)) & !is.null(data)){data = t(as.matrix(data))}
if(!is.null(seed)){set.seed(seed)}
Expand All @@ -56,10 +55,28 @@ find_prototypes = function(starting_proto = NULL, nb_cells = NULL, data = NULL,
if(print_progress){print(paste("start number", it))}
if(is.null(starting_proto)){
if(is.null(nb_cells)){stop("nb_cells must me provided if starting_proto is not")}
are_distinct = FALSE
while(are_distinct == FALSE){
prototypes_it = lapply(sample(x = 1:dim(data)[length(dim(data))], size = nb_cells), function(i){asub(data,dims = length(dim(data)), idx = i)})
are_distinct = distinct_prototypes(prototypes_it)
samples_init = c()
prototypes_it = list()
while(length(prototypes_it) < nb_cells){
if(method_IS == "unique"){
idx = sample(x = setdiff(1:dim(data)[length(dim(data))], samples_init), size = 1)
samples_init = c(samples_init, idx)
prototypes_prov = c(prototypes_it, list(asub(data,dims = length(dim(data)), idx = idx)))
if(distinct_prototypes(prototypes_prov)){prototypes_it = prototypes_prov}
}
else if(!data_bool & method_IS == "percell"){
idx_sample = sample(1:length(data), size = 1)
idx = sample(x = 1:dim(data[[idx_sample]])[length(dim(data[[idx_sample]]))], size = 1)
samples_init = c(samples_init, idx)
prototypes_prov = c(prototypes_it, list(asub(data[[idx_sample]],dims = length(dim(data[[idx_sample]])), idx = idx)))
if(distinct_prototypes(prototypes_prov)){prototypes_it = prototypes_prov}
}
else if(data_bool & method_IS == "percell"){
idx = sample(x = setdiff(1:dim(data_ref)[length(dim(data_ref))], samples_init), size = 1)
samples_init = c(samples_init, idx)
prototypes_prov = c(prototypes_it, list(asub(data_ref,dims = length(dim(data_ref)), idx = idx)))
if(distinct_prototypes(prototypes_prov)){prototypes_it = prototypes_prov}
}
}
}
else{
Expand Down Expand Up @@ -100,7 +117,7 @@ find_prototypes = function(starting_proto = NULL, nb_cells = NULL, data = NULL,
}
if(method_IS == "percell"){
batch_size = NULL
error = quanti_error(data = data[[index_sampling_error]], prototypes = prototypes_it, density_ratio = density_ratio[[index_sampling_error]], batch_size = batch_size, distance_func = distance_func)
error = quanti_error(data = data, prototypes = prototypes_it, density_ratio = density_ratio, batch_size = batch_size, distance_func = distance_func, method_IS = method_IS, sampling_cells = sampling_cells)
}
else if(method_IS == "unique"){
if(batch){batch_size = dim(data[[1]])[length(dim(data[[1]]))]}
Expand Down
2 changes: 1 addition & 1 deletion R/fit_metamodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
#' control_classification = list(nodesize = 4), threshold_classification = 2)

fit_metamodel = function(design_train, outputs_train, seed = NULL, ncoeff,npc,kernel="matern5_2", wf = "d4", boundary = "periodic",J=1,
regmodel = "constant", normalize = FALSE, optim = "BFGS", objective = "LL", parameters = NULL,noise=FALSE, nugget = FALSE, classification = FALSE, control_classification = NULL,threshold_classification = NULL,threshold_fpca = NULL){
regmodel = "constant", normalize = FALSE, optim = "BFGS", objective = "LL", parameters = NULL,noise=FALSE, nugget = FALSE, classification = FALSE, control_classification = NULL,threshold_classification = 0,threshold_fpca = NULL){
if(is.null(threshold_fpca)){threshold_fpca = threshold_classification}
pred_fpca = TRUE
if(classification){
Expand Down
2 changes: 2 additions & 0 deletions R/probas_loo.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,7 @@ probas_loo = function(outputs, density_ratio, prototypes, distance_func = functi
probas_pred_df = rbind(probas_pred_df, c(as.numeric(grid_cv[i,]), probas_pred_cv))
relative_error_df = rbind(relative_error_df, c(as.numeric(grid_cv[i,]), abs(probas_pred_cv - probas_true)/probas_true))
}
colnames(relative_error_df) = c("ncoeff", "npc", 1:(ncol(relative_error_df)-2))
colnames(probas_pred_df) = c("ncoeff", "npc", 1:(ncol(probas_pred_df)-2))
return(list(probas_pred = probas_pred_df, error = relative_error_df, outputs_pred = outputs_loo_list))
}
2 changes: 2 additions & 0 deletions R/probas_training_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,7 @@ probas_training_test = function(outputs_train,outputs_test, density_ratio, proto
probas_pred_df = rbind(probas_pred_df, c(as.numeric(grid_cv[i,]), probas_pred_cv))
relative_error_df = rbind(relative_error_df, c(as.numeric(grid_cv[i,]), abs(probas_pred_cv - probas_true)/probas_true))
}
colnames(relative_error_df) = c("ncoeff", "npc", 1:(ncol(relative_error_df)-2))
colnames(probas_pred_df) = c("ncoeff", "npc", 1:(ncol(probas_pred_df)-2))
return(list(probas_pred = probas_pred_df, error = relative_error_df, model_tuning = model_tuning, outputs_pred = outputs_pred_list))
}
2 changes: 2 additions & 0 deletions R/rf_proba_k_fold.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ rf_probas_k_fold = function(design, outputs, threshold_classification, threshold
probas_pred_df = rbind(probas_pred_df,probas_pred_cv)
relative_error_df = rbind(relative_error_df, abs(probas_pred_cv - probas_true)/probas_true)
}
colnames(relative_error_df) = c("ncoeff", "npc", 1:(ncol(relative_error_df)-2))
colnames(probas_pred_df) = c("ncoeff", "npc", 1:(ncol(probas_pred_df)-2))
if(return_pred == FALSE){outputs_pred = NULL}
return(list(list_search = list_search,probas_pred = probas_pred_df, error = relative_error_df, outputs_pred = outputs_pred))
}
Expand Down
2 changes: 2 additions & 0 deletions R/rf_proba_training_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ rf_probas_training_test = function(design_train, design_test, outputs_train, out
relative_error[probas_pred_cv != 0 & probas_true == 0] = Inf
relative_error_df = rbind(relative_error_df, relative_error)
}
colnames(relative_error_df) = c("ncoeff", "npc", 1:(ncol(relative_error_df)-2))
colnames(probas_pred_df) = c("ncoeff", "npc", 1:(ncol(probas_pred_df)-2))
if(return_pred == FALSE){outputs_pred = NULL}
return(list(list_search = list_search,probas_pred = probas_pred_df, error = relative_error_df, outputs_pred = outputs_pred))
}
7 changes: 2 additions & 5 deletions R/std_centroid.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#' list_std_centroid = std_centroid(data = data, prototypes_list =
#' prototypes_list, density_ratio = density_ratio, distance_func = distance_func
#' , cells = 1:length(prototypes_list[[1]]), nv = 50)
std_centroid = function(data = NULL, prototypes_list, density_ratio = rep(1,dim(data)[length(data)]), distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv,outputs_function = NULL, inputs = NULL, batch_size = nrow(inputs), return_cell_numbers = FALSE, bootstrap = NULL){
std_centroid = function(data = NULL, prototypes_list, density_ratio = rep(1,dim(data)[length(dim(data))]), distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv = NULL,outputs_function = NULL, inputs = NULL, batch_size = nrow(inputs), return_cell_numbers = FALSE, bootstrap = NULL){
if(is.null(nv)){nv = length(density_ratio)}
bool_cell_numbers = is.null(cell_numbers)
std_ratio_list = list()
if(is.null(data)){
Expand Down Expand Up @@ -92,10 +93,6 @@ std_centroid = function(data = NULL, prototypes_list, density_ratio = rep(1,dim(
for(boot in 1:bootstrap){
idxs = sample(1:length(cell_numbers_it), size = length(cell_numbers_it), replace = TRUE)
cell_numbers_boot = cell_numbers_it[idxs]
data <<- asub(data,dims = length(dim(data)), idx = idxs)
density_ratio <<- density_ratio[idxs]
cell_numbers <<- cell_numbers_boot
aa <<- compute_centroids_and_proba(data = asub(data,dims = length(dim(data)), idx = idxs), density_ratio = density_ratio[idxs], method_IS = "unique",cell_numbers = cell_numbers_boot, cells = cells)$centroids
list_centro_boot[[boot]] = compute_centroids_and_proba(data = asub(data,dims = length(dim(data)), idx = idxs), density_ratio = density_ratio[idxs], method_IS = "unique",cell_numbers = cell_numbers_boot, cells = cells)$centroids
}
for(cell in 1:length(cells)){
Expand Down
3 changes: 2 additions & 1 deletion R/std_proba.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#' list_std_proba = std_proba(data = data, prototypes_list = prototypes_list,
#' density_ratio = density_ratio, distance_func = distance_func,
#' cells = 1:length(prototypes_list[[1]]), nv = 50)
std_proba = function(data = NULL, prototypes_list, density_ratio = rep(1,dim(data)[length(data)]), distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv, outputs_function = NULL, inputs = NULL, batch_size = NULL, return_cell_numbers = FALSE, bootstrap = NULL){
std_proba = function(data = NULL, prototypes_list, density_ratio = rep(1,dim(data)[length(dim(data))]), distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))},cells, cell_numbers = NULL, nv = NULL, outputs_function = NULL, inputs = NULL, batch_size = NULL, return_cell_numbers = FALSE, bootstrap = NULL){
if(is.null(nv)){nv = length(density_ratio)}
std_list = list()
if(is.null(data) & is.null(cell_numbers)){
nb_batch = nrow(inputs)%/%batch_size
Expand Down
3 changes: 0 additions & 3 deletions man/find_prototypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/fit_metamodel.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/std_centroid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/std_proba.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 72782d4

Please sign in to comment.