Add method, function, and plots for EFSA criteria: PPC (function and …

…plot), and NRMSE. Results of EFSA criteria in validation plots are now rounded at 2 significant digits after the decimal
bgoussen · Nov 9, 2021 · 84dee6f · 84dee6f
1 parent 9e0d30a
commit 84dee6f
Show file tree

Hide file tree

Showing 8 changed files with 229 additions and 5 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -4,15 +4,19 @@ S3method(plot,beeSurvData)
 S3method(plot,beeSurvFit)
 S3method(plot,beeSurvPred)
 S3method(plot,beeSurvValidation)
+S3method(plot,ppc)
+S3method(ppc,beeSurvFit)
 S3method(predict,beeSurvFit)
 S3method(summary,beeSurvFit)
 S3method(traceplot,beeSurvFit)
 S3method(validate,beeSurvFit)
 export(concAC)
 export(concAO)
 export(concCst)
+export(criteriaCheck)
 export(dataGUTS)
 export(fitBeeGUTS)
+export(ppc)
 export(traceplot)
 export(validate)
 import(Rcpp)

diff --git a/R/EFSAcriteria.R b/R/EFSAcriteria.R
@@ -0,0 +1,51 @@
+#' Computes PPC and NRMSE as defined in EFSA 2018
+#'
+#' @param x an object of class \code{beeSurvFit} or \code{beeSurvPred}
+#'
+#' @return The function returns a list with three items:
+#' \item{PPC}{The criterion, in percent, compares the predicted median number of survivors associated
+#' to their uncertainty limits with the observed numbers of survivors.
+#' Based on experience, PPC resulting in more than \eqn{50\%} of the
+#' observations within the uncertainty limits indicate good model performance (EFSA 2018). A fit of
+#' \eqn{100\%} may hide too large uncertainties of prediction (so covering all data).}
+#' \item{PPC_global}{percentage of PPC for the whole data set by gathering data types.}
+#' \item{NRMSE}{The criterion, in percent, is based on the classical root-mean-square error (RMSE),
+#'  used to aggregate the magnitudes of the errors in predictions for various time-points
+#'  into a single measure of predictive power. In order to provide a criterion expressed
+#'   as a percentage, NRMSE is the normalised RMSE by the mean of the observations.
+#'   EFSA (2018) recognised that a NRMSE of less than 50% indicates good model performance}
+#'
+#'
+#'  @references
+#'  EFSA PPR Scientific Opinion (2018)
+#' \emph{Scientific Opinion on the state of the art of Toxicokinetic/Toxicodynamic (TKTD) effect models for regulatory risk assessment of pesticides for aquatic organisms}
+#' \url{https://www.efsa.europa.eu/en/efsajournal/pub/5377}
+#'
+#' @export
+#'
+criteriaCheck<- function(x){
+
+  # --- PPC
+  dfGlobal<- ppc(x) %>%
+    dplyr::mutate(ppcMatching_valid = ifelse(value<q_0.025|value>q_0.975, 0, 1),
+                  SE_id = (value - median)^2)
+
+  dfPPC <- dfGlobal %>%
+    dplyr::select(data, ppcMatching_valid) %>%
+    dplyr::group_by(data) %>%
+    dplyr::summarise(PPC = mean(ppcMatching_valid)*100)
+
+  # --- NRMSE
+  dfNRMSE <- dfGlobal %>%
+    dplyr::select(value, data, SE_id) %>%
+    dplyr::group_by(data) %>%
+    dplyr::summarise(NRMSE = sqrt(mean(SE_id,  na.rm = TRUE)) / mean(value , na.rm = TRUE) * 100)
+
+
+  return(list(percentPPC = as.data.frame(dfPPC),
+              percentNRMSE = as.data.frame(dfNRMSE))
+
+  )
+}
+
+
diff --git a/R/plotBeeGUTS.R b/R/plotBeeGUTS.R
@@ -221,14 +221,14 @@ plot.beeSurvValidation <- function(x,
 
 
   EFSA_criteria <- x$EFSA$Percent_PPC
+  EFSA_criteria$PPC <- round(EFSA_criteria$PPC, digits = 2)
   EFSA_criteria$PPC_global <- ""
-  EFSA_criteria$PPC_global[1] <- x$EFSA$Percent_PPC_global
-  EFSA_criteria$NRMSE <- x$EFSA$Percent_NRMSE$NRMSE
+  EFSA_criteria$PPC_global[1] <- round(x$EFSA$Percent_PPC_global, digits = 2)
+  EFSA_criteria$NRMSE <- round(x$EFSA$Percent_NRMSE$NRMSE, digits = 2)
   EFSA_criteria$NRMSE_global <- ""
-  EFSA_criteria$NRMSE_global[1] <- x$EFSA$Percent_NRMSE_global
-  EFSA_criteria$SPPE <-  x$EFSA$Percent_SPPE$SPPE
+  EFSA_criteria$NRMSE_global[1] <- round(x$EFSA$Percent_NRMSE_global, digits = 2)
+  EFSA_criteria$SPPE <-  round(x$EFSA$Percent_SPPE$SPPE, digits = 2)
   ###############################################
-
   colnames(x$sim)[3] <- "Treatment" # Rename column name for plotting purposes
 
   ggSurv <- ggplot(data = x$sim, aes(x = time, y = Nsurv_q50_valid,  group = Treatment)) +
@@ -401,3 +401,41 @@ traceplot.beeSurvFit <- function(object, ..., incWarmup_trace = TRUE, incWarmup_
   return(ggOut)
 }
 
+
+
+
+
+
+#' Plotting method for \code{ppc} objects
+#'
+#' @param x An object of class \code{ppc}.
+#' @param data_type  A string designating the type of data to be plotted: \code{length},
+#' \code{reproduction} or \code{exposure}
+#' @param \dots  Further arguments to be passed to generic methods.
+#'
+#' @return  an object of class \code{ggplot}.
+#'
+#'
+#' @export
+plot.ppc <- function(x, ...) {
+  Nsurv_ppc <- x
+  ppc_pct<- round(nrow(Nsurv_ppc[Nsurv_ppc$col=="green",])/nrow(Nsurv_ppc)*100, digits = 2)
+  nrmse<- round(sqrt(sum((Nsurv_ppc$value-Nsurv_ppc$median)^2, na.rm = TRUE)/nrow(Nsurv_ppc))/mean(Nsurv_ppc$value,na.rm = TRUE)*100, digits=2)
+
+  ggOut <-ggplot() +
+    geom_segment(aes(x = value, xend = value,
+                     y =q_0.025 , yend =q_0.975 ), data = Nsurv_ppc,
+                 color = Nsurv_ppc$col)+
+    geom_point(aes(x = value, y = median), Nsurv_ppc)+
+    geom_abline(intercept = 0, slope = 1, size=0.7)+
+    expand_limits(y = 0) +
+    expand_limits(x = 0) +
+    theme_minimal()+
+    coord_fixed(ratio=1)+
+    labs(x = "Observed number of survivors",
+         y= "Predicted number of survivors") +
+    theme(axis.title = element_text(size=7))+
+    ggtitle(paste0("Survival \nPPC= ",ppc_pct,"%", "\nNRMSE= ", nrmse,"%"))
+
+  return(ggOut)
+}
diff --git a/R/ppc.R b/R/ppc.R
@@ -0,0 +1,45 @@
+#' Generates an object to be used in posterior predictive check for \code{beeSurvFit}, \code{beeSurvPred}
+#'
+#' @param x  an object used to select a method \code{ppc}
+#'
+#' @export
+#'
+ppc <- function(x){
+  UseMethod("ppc")
+}
+
+
+
+#' Posterior predictive check method for \code{beeSurvFit} objects
+#'
+#' @param x an object of class \code{beeSurvFit}
+#'
+#'
+#' @return a \code{data.frame} of class \code{ppc}
+#'
+#' @examples
+#' @export
+#'
+ppc.beeSurvFit <- function(x){
+  NsurvPred_all<- as.data.frame(x$stanFit, pars = "Nsurv_ppc")
+  NsurvPred_quantiles<- NsurvPred_all%>%
+    tidyr::pivot_longer(cols = tidyr::starts_with('Nsurv'),
+                        names_to = "ppc",
+                        values_to = "value")%>%
+    dplyr::group_by(ppc)%>%
+    dplyr::summarise(median = stats::quantile(value,  0.5, na.rm = TRUE),
+                     q_0.025=stats::quantile(value,  0.025, na.rm = TRUE),
+                     q_0.975=stats::quantile(value,  0.975, na.rm = TRUE))
+
+  NsurvData_all<- data.frame(value=x$dataFit$Nsurv, id=seq(1,x$dataFit$nData_Nsurv, 1))%>%
+    dplyr::mutate(ppc=paste0("Nsurv_ppc[",id, "]"))
+
+  Nsurv_ppc<- dplyr::full_join( NsurvPred_quantiles,  NsurvData_all, by="ppc")%>%
+    dplyr::mutate(col=ifelse(value<q_0.025|value>q_0.975, "red", "green")) %>%
+    dplyr::arrange(id)
+
+  Nsurv_ppc$data<-"Survival"
+
+  class(Nsurv_ppc) <- c("ppc", class(Nsurv_ppc))
+  return(Nsurv_ppc)
+}
diff --git a/man/criteriaCheck.Rd b/man/criteriaCheck.Rd
diff --git a/man/plot.ppc.Rd b/man/plot.ppc.Rd
diff --git a/man/ppc.Rd b/man/ppc.Rd
diff --git a/man/ppc.beeSurvFit.Rd b/man/ppc.beeSurvFit.Rd