change notations

charliesire · Mar 14, 2023 · c0b289f · c0b289f
1 parent 31cb46e
commit c0b289f
Show file tree

Hide file tree

Showing 56 changed files with 562 additions and 519 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,12 +1,12 @@
 # Generated by roxygen2: do not edit by hand
 
-export(compute_centroids_and_proba)
 export(compute_density_ratio)
 export(create_models_tuning)
-export(distance_to_gamma)
-export(distinct_gamma)
+export(distance_to_prototypes)
+export(distinct_prototypes)
 export(estim_denom_centroid)
 export(estim_num_centroid)
+export(find_prototypes)
 export(fit_metamodel)
 export(get_cell_numbers)
 export(get_probas)
@@ -15,7 +15,6 @@ export(predict_outputs)
 export(probas_k_fold)
 export(probas_loo)
 export(probas_training_test)
-export(proto_map_algo)
 export(quanti_error)
 export(rf_pred_k_fold)
 export(rf_pred_training_test)
@@ -26,7 +25,7 @@ export(rf_rmse_training_test)
 export(rmse_k_fold)
 export(rmse_loo)
 export(rmse_training_test)
-export(sort_gamma)
+export(sort_prototypes)
 export(std_centroid)
 export(std_proba)
 import(DiceKriging)

diff --git a/R/1d_to_matrix.R b/R/1d_to_matrix.R
@@ -1,15 +1,15 @@
 #' @title Transform every 1D prototype into a column matrix
 #'
-#' @param gamma a list of prototypes
+#' @param prototypes a list of prototypes
 #'
 #' @return A list of matrices prototypes
 #' @export
 #'
 #' @examples
 #' oned_to_matrix(list(1:5, runif(5), rep(0,5)))
-oned_to_matrix = function(gamma){
-  gamma = lapply(1:length(gamma), function(j){
-    if(is.null(dim(gamma[[j]])) | length(dim(gamma[[j]])) == 1){t(as.matrix(gamma[[j]]))}
-    else{gamma[[j]]}})
-  return(gamma)
+oned_to_matrix = function(prototypes){
+  prototypes = lapply(1:length(prototypes), function(j){
+    if(is.null(dim(prototypes[[j]])) | length(dim(prototypes[[j]])) == 1){t(as.matrix(prototypes[[j]]))}
+    else{prototypes[[j]]}})
+  return(prototypes)
 }
diff --git a/R/compute_centroids_and_proba.R b/R/compute_centroids_and_proba.R
@@ -1,39 +1,34 @@
 #' @title Compute the centroid and the probability mass of the Voronoï cells
 #'
-#' @param outputs The output samples that need to be quantized. If method = "percell", a list of output samples must be provided, of length equal to the number of Voronoï cells.
-#' @param cell_numbers The voronoi cell number of every output
+#' @param data The data that needs to be quantized. If method = "percell", a list of data samples must be provided, of length equal to the number of Voronoï cells.
+#' @param cell_numbers The voronoi cell number of every data element
 #' @param method_IS The method of Importance Sampling : "unique" means there is a unique biased density involved, "percell" means there is one biased density (and then one biased sample) for each cell.
-#' @param density_ratio A vector indicating the weight fX/g of each output. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.
+#' @param density_ratio A vector indicating the weight fX/g of each data element. Default is a vector of 1. If method = "percell", a list of density_ratio must be provided, of length equal to the number of Voronoï cells.
 #' @param bias A vector indicating the bias that came out when computing the importance sampling estimators of the membership probabilities. Each element of the vector is associated to a Voronoi cell. Default is 0 for all Voronoi cells.
+#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.
 
 #' @return The centroid and the probability mass of each probability cell
-#' @export
 #' @import abind
-#' @examples
-#' outputs = array(runif(9*20)*15, dim = c(3,3,20))
-#' cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3)
-#' density_ratio = rep(1,20)
-#' compute_centroids_and_proba(outputs = outputs,cell_numbers = cell_numbers,
-#' density_ratio = density_ratio)
 
-compute_centroids_and_proba = function(outputs, cell_numbers, method_IS = "unique", density_ratio = rep(1, dim(outputs)[length(dim(outputs))]), bias = rep(0,length(unique(unlist(cell_numbers))))){
-  n = length(cell_numbers)#nb of outputs
+compute_centroids_and_proba = function(data, cell_numbers, method_IS = "unique", density_ratio = rep(1, dim(data)[length(dim(data))]), bias = rep(0,length(unique(unlist(cell_numbers)))), batch = FALSE){
+  n = length(unlist(cell_numbers))
   nb_cells = length(unique(unlist(cell_numbers)))
   centroids = list()
   probas = c()
   for(j in 1:nb_cells){
     if(method_IS == "unique"){
-      outputs_j = outputs
+      data_j = data
       cell_numbers_j = cell_numbers
       density_j = density_ratio
     }
     else if(method_IS == "percell"){
-      outputs_j = outputs[[j]]
+      data_j = data[[j]]
       cell_numbers_j = cell_numbers[[j]]
       density_j = density_ratio[[j]]
     }
-    numerator =  estim_num_centroid(outputs = outputs_j, cell_numbers = cell_numbers_j, density_ratio = density_j, cell = j) ## Sum the Y(X)f/nu of the cell
-    denominator = estim_denom_centroid(density_ratio = density_j, cell_numbers = cell_numbers_j, cell = j, bias = bias[j])
+    numerator =  estim_num_centroid(data = data_j, cell_numbers = cell_numbers_j, density_ratio = density_j, cell = j, batch = batch) ## Sum the Y(X)f/nu of the cell
+    if(batch){denominator = sum(Vectorize(function(p){estim_denom_centroid(density_ratio = density_j[[p]], cell_numbers = cell_numbers_j[[p]], cell = j, bias = bias[j])})(1:length(density_j)))}
+    else{denominator = estim_denom_centroid(density_ratio = density_j, cell_numbers = cell_numbers_j, cell = j, bias = bias[j])}
     centroids[[j]] = numerator/denominator
     probas = c(probas, denominator/n)
   }

diff --git a/R/create_model_tuning.R → R/create_models_tuning.R b/R/create_model_tuning.R → R/create_models_tuning.R
diff --git a/R/density_ratio.R b/R/density_ratio.R
@@ -5,6 +5,7 @@
 #' @param inputs The value of the sampled inputs
 #'
 #' @return A vector with the weights fX/g of the inputs
+#' @import foreach
 #' @export
 #'
 #' @examples
@@ -13,8 +14,8 @@
 #' inputs = array(rnorm(30), dim = c(10,3))
 #' compute_density_ratio(f,g, inputs)
 compute_density_ratio = function(f, g, inputs){
-  res = foreach(i = 1:nrow(inputs), .combine = 'c')%dopar%{
-    as.numeric(f(inputs[i,])/g(inputs[i,]))
+  res = foreach(it = 1:nrow(inputs), .combine = 'c')%dopar%{
+    as.numeric(f(inputs[it,])/g(inputs[it,]))
     }
   return(res)
 }

diff --git a/R/distance2gamma.R b/R/distance2gamma.R
diff --git a/R/distance_to_prototypes.R b/R/distance_to_prototypes.R
@@ -0,0 +1,17 @@
+#' @title Compute the distance between a point and its nearest prototype, returning this distance and the associated cell number
+#'
+#' @param x A point in the space of the data elements
+#' @param prototypes A set of prototypes
+#' @param distance_func A function computing a distance between two data elements
+#'
+#' @return The distance between a point and its nearest centroid
+#' @export
+#'
+#' @examples
+#' distance_to_prototypes(array(1:9, dim = c(3,3)), list(array(10, dim = c(3,3)),
+#' array(5, dim = c(3,3)), array(6, dim = c(3,3))),
+#' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))})
+distance_to_prototypes = function(x, prototypes, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){
+  distance = Vectorize(function(k){distance_func(x, prototypes[[k]])})(1:length(prototypes))
+  return(list(cellule = which.min(distance), dist = min(distance)))
+}
diff --git a/R/distinct_gamma.R b/R/distinct_gamma.R
diff --git a/R/distinct_prototypes.R b/R/distinct_prototypes.R
@@ -0,0 +1,16 @@
+#' @title Check that all the elements in a list of prototypes are distinct
+#'
+#' @param prototypes A list of prototypes
+#'
+#' @return A boolean indicating whether the elements in prototypes are distinct
+#' @export
+#'
+#' @examples
+#' distinct_prototypes(list(1,2,34,1))
+distinct_prototypes = function(prototypes){
+  for(i in 1:(length(prototypes)-1)){
+    dist_prototypes = distance_to_prototypes(prototypes[[i]], lapply((i+1):length(prototypes), function(j){prototypes[[j]]}))$dist
+    if(dist_prototypes == 0){return(FALSE)}
+  }
+  return(TRUE)
+}
diff --git a/R/error_quanti.R b/R/error_quanti.R
@@ -1,24 +1,32 @@
 #' @title Compututation of the empirical quantization error
 #'
 
-#' @param outputs The output samples that need to be quantized. Useful only if cell_numbers == NULL.
-#' @param gamma A set of prototypes. Useful only if cell_numbers == NULL.
-#' @param density_ratio density_ratio indicates the weight fX/g of each output
-#' @param distance_func A function computing a distance between two elements in the output spaces. Useful only if cell_numbers == NULL.
-
+#' @param data The data that needs to be quantized. Useful only if cell_numbers == NULL.
+#' @param prototypes A set of prototypes. Useful only if cell_numbers == NULL.
+#' @param density_ratio density_ratio indicates the weight fX/g of each data element.
+#' @param distance_func A function computing a distance between two data elements. Useful only if cell_numbers == NULL.
+#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.
+#'
 #' @return An estimation of the quantization error
 #' @export
 #' @import abind
 #' @examples
-#' gamma = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3)))
-#' outputs = array(runif(9*20)*20, dim = c(3,3,20))
+#' prototypes = list(array(10, dim = c(3,3)), array(5, dim = c(3,3)), array(6, dim = c(3,3)))
+#' data = array(runif(9*20)*20, dim = c(3,3,20))
 #' density_ratio = rep(1,20)
 #' distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}
-#' quanti_error(outputs = outputs, gamma = gamma, density_ratio = density_ratio,
+#' quanti_error(data = data, prototypes = prototypes, density_ratio = density_ratio,
 #' distance_func = distance_func)
-quanti_error = function(outputs, gamma, density_ratio, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){
+quanti_error = function(data, prototypes, density_ratio, batch = FALSE, distance_func = function(A1,A2){return(sqrt(sum((A1-A2)^2)))}){
   drop = "selected"
-  if(sum(dim(gamma[[1]]) == 1) == length(dim(gamma[[1]]))){drop = FALSE}
-  distances = Vectorize(function(it){distance_to_gamma(x = asub(x = outputs, dims = length(dim(outputs)), idx = it,drop = drop), gamma = gamma, distance_func = distance_func)$dist})(1:dim(outputs)[length(dim(outputs))])
-  return(sqrt(mean(distances^2*density_ratio)))
+  if(sum(dim(prototypes[[1]]) == 1) == length(dim(prototypes[[1]]))){drop = FALSE}
+  if(!batch){
+    distances = Vectorize(function(it){distance_to_prototypes(x = asub(x = data, dims = length(dim(data)), idx = it,drop = drop), prototypes = prototypes, distance_func = distance_func)$dist})(1:dim(data)[length(dim(data))])
+    res = sqrt(mean(distances^2*density_ratio))
+  }
+  else{
+    distances = as.numeric(sapply(1:length(data), function(b){Vectorize(function(it){distance_to_prototypes(x = asub(x = data[[b]], dims = length(dim(data[[b]])), idx = it,drop = drop), prototypes = prototypes, distance_func = distance_func)$dist})(1:dim(data[[b]])[length(dim(data[[b]]))])}))
+    res = sqrt(mean(distances^2*unlist(density_ratio)))
+  }
+  return(res)
 }
diff --git a/R/estim_denom_centroid.R b/R/estim_denom_centroid.R
@@ -1,7 +1,7 @@
 #' Title Compute the estimator which is the denominator of the centroid estimation
 #'
-#' @param density_ratio density_ratio indicates the weight fX/g of each output
-#' @param cell_numbers The output samples that need to be quantized
+#' @param density_ratio density_ratio indicates the weight fX/g of each data element
+#' @param cell_numbers The voronoi cell number of every data element.
 #' @param cell The cell number of the computed centroid
 #' @param bias A number indicating the bias that came out when computing the importance sampling estimators of the membership probabilities of the Voronoi cell. Default is 0.
 #'

diff --git a/R/estim_num_centroid.R b/R/estim_num_centroid.R
@@ -1,27 +1,41 @@
 #' @title Compute the estimator which is the numerator of the centroid estimation
 #'
-#' @param outputs The output samples that need to be quantized
-#' @param cell_numbers The voronoi cell number of every output
-#' @param density_ratio density_ratio indicates the weight fX/g of each output
+#' @param data The data that needs to be quantized
+#' @param cell_numbers The voronoi cell number of every data element
+#' @param density_ratio density_ratio indicates the weight fX/g of each data element
 #' @param cell The cell number of the computed centroid
-#'
-#' @return An array having the same dimension as an output, which is the numerator of the centroid estimator
+#' @param batch A boolean indicating whether the computations must be performed by batch or not. If TRUE, data, cell_numbers and density_ratio must be lists. Default is False.
+#' @return An array having the same dimension as an data element, which is the numerator of the centroid estimator
 #' @export
 #' @import abind
 #' @examples
-#' outputs = array(runif(9*20)*15, dim = c(3,3,20))
+#' data = array(runif(9*20)*15, dim = c(3,3,20))
 #' cell_numbers = c(1,3,2,1,2,1,1,2,3,3,2,2,2,2,2,3,1,1,3,3)
 #' density_ratio = rep(1,20)
 #' cell = 3
-#' estim_num_centroid(outputs = outputs,cell_numbers = cell_numbers,
+#' estim_num_centroid(data = data,cell_numbers = cell_numbers,
 #' density_ratio = density_ratio, cell = cell)
-estim_num_centroid = function(outputs, cell_numbers, density_ratio, cell){
-  outputs_cell = asub(x = outputs, dims = length(dim(outputs)), idx = which(cell_numbers == cell))
-  outputs_cell = matrix(outputs_cell, nrow = prod(dim(outputs)[1:(length(dim(outputs))-1)]))
-  outputs_cell = t(outputs_cell)
-  res = outputs_cell*density_ratio[cell_numbers == cell]
-  res = apply(res,2,sum)
-  res = array(res, dim = dim(outputs)[1:(length(dim(outputs))-1)])
+estim_num_centroid = function(data, cell_numbers, density_ratio, cell, batch = FALSE){
+  if(batch){
+    res = 0
+    for(batch_i in 1:length(data)){
+      data_cell = asub(x = data[[batch_i]], dims = length(dim(data[[batch_i]])), idx = which(cell_numbers[[batch_i]] == cell))
+      data_cell = matrix(data_cell, nrow = prod(dim(data[[batch_i]])[1:(length(dim(data[[batch_i]]))-1)]))
+      data_cell = t(data_cell)
+      data_cell = data_cell*density_ratio[[batch_i]][cell_numbers[[batch_i]] == cell]
+      res = res + apply(data_cell,2,sum)
+    }
+    res = array(res, dim = dim(data[[1]])[1:(length(dim(data[[1]]))-1)])
+
+  }
+  else{
+  data_cell = asub(x = data, dims = length(dim(data)), idx = which(cell_numbers == cell))
+  data_cell = matrix(data_cell, nrow = prod(dim(data)[1:(length(dim(data))-1)]))
+  data_cell = t(data_cell)
+  data_cell = data_cell*density_ratio[cell_numbers == cell]
+  res = apply(data_cell,2,sum)
+  res = array(res, dim = dim(data)[1:(length(dim(data))-1)])
+  }
   return(res)
 }