From 0957f95c0d7cd9bd3e8d4cc9aaac5031eceeeaff Mon Sep 17 00:00:00 2001 From: Samer Mouksassi Date: Thu, 4 Jan 2024 14:50:26 +0200 Subject: [PATCH] intial adding of gglogisticexpdist and an example dataset --- DESCRIPTION | 6 +- NAMESPACE | 3 + R/gglogisticexpdist.R | 558 +++++++++++++++++++++++++++++++++++++ R/gglogisticexpdist.R.bak | 571 ++++++++++++++++++++++++++++++++++++++ R/logisticdata.R | 24 ++ man/gglogisticexpdist.Rd | 176 ++++++++++++ man/logistic_data.Rd | 36 +++ 7 files changed, 1373 insertions(+), 1 deletion(-) create mode 100644 R/gglogisticexpdist.R create mode 100644 R/gglogisticexpdist.R.bak create mode 100644 R/logisticdata.R create mode 100644 man/gglogisticexpdist.Rd create mode 100644 man/logistic_data.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b3b00ad..e2763e0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,7 +53,11 @@ Imports: table1 (>= 1.4.2), zoo, shinyFiles, - RPostgres + RPostgres, + forcats, + ggridges, + rms, + tibble Suggests: knitr, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index 891c269..d0c848b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(geom_kmband) export(geom_kmticks) export(get_source_code) export(ggkmrisktable) +export(gglogisticexpdist) export(merge_steps) export(run_ggquickeda) export(sourceable) @@ -40,5 +41,7 @@ importFrom(scales,percent) importFrom(scales,percent_format) importFrom(scales,trans_breaks) importFrom(scales,trans_format) +importFrom(stats,median) +importFrom(stats,quantile) importFrom(survival,Surv) importFrom(survival,survfit) diff --git a/R/gglogisticexpdist.R b/R/gglogisticexpdist.R new file mode 100644 index 0000000..338d30b --- /dev/null +++ b/R/gglogisticexpdist.R @@ -0,0 +1,558 @@ +#' @importFrom rlang `:=` +#' @importFrom rlang .data +#' @importFrom rlang sym +#' @import tidyr +#' @importFrom stats median +#' @importFrom stats quantile + +summary_df <- function(x,y, probs = c(0.25,0.75,0.90,0.10)) { + N = Ntot = prob = NULL + tibble::tibble( + minexp = min(x), + maxexp = max(x), + medexp = stats::median(x), + meanexp = mean(x), + N = sum(y,na.rm=TRUE), + Nmiss = length(x[is.na(x)]), + Ntot = dplyr::n(), + prob = N/Ntot, + SE = sqrt(prob*(1-prob)/Ntot ), + values = stats::quantile(x, probs, na.rm = TRUE), + quant = probs + ) +} +plogis <- function(x) exp(x)/(1+exp(x)) + +#' gglogisticexpdist +#' +#' Produces a logistic fit plot with a facettable exposures/quantiles/distributions in ggplot2 +#' @param data Data to use with multiple endpoints stacked into Endpoint(endpoint name), response 0/1 +#' @param response name of the column holding the valuesresponse 0/1 +#' @param endpoint name of the column holding the name/key of the endpoint default to `Endpoint` +#' @param DOSE name of the column holding the DOSE values default to `DOSE` +#' @param exposure_metrics name(s) of the column(s) to be stacked into `expname` `exptile` and split into `exposure_metric_split` +#' @param exposure_metric_split one of "median", "tertile", "quartile", "none" +#' @param exposure_metric_soc_value special exposure code for standard of care default -99 +#' @param exposure_metric_plac_value special exposure code for placebo default 0 +#' @param exposure_distribution one of distributions, lineranges or none +#' @param dose_plac_value string identifying placebo in DOSE column +#' @param xlab text to be used as x axis label +#' @param ylab text to be used as y axis label +#' @param prob_text_size probability text size default to 5 +#' @param prob_obs_bydose TRUE/FALSE +#' @param N_text_size N responders/Ntotal by exposure bin text size default to 5 +#' @param binlimits_text_size 5 binlimits text size +#' @param dist_position_scaler space occupied by the distribution default to 0.2 +#' @param dist_offset offset where the distribution position starts 0 +#' @param lineranges_ypos where to put the lineranges 0.2 +#' @param yproj project the probabilities on y axis TRUE/FALSE +#' @param yproj_xpos y projection x position 0 +#' @param yproj_dodge y projection dodge value 0.2 +#' @param yaxis_position where to put y axis "left" or "right" +#' @param facet_formula facet formula to be use otherwise endpoint ~ expname +#' @param theme_certara apply certara colors and format for strips and default colour/fill +#' @examples +#' # Example 1 +#' library(ggplot2) +#' effICGI <- logistic_data |> +#' dplyr::filter(!is.na(ICGI))|> +#' dplyr::filter(!is.na(AUC)) +#'effICGI$DOSE <- factor(effICGI$DOSE, +#' levels=c("0", "600", "1200","1800","2400"), +#' labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +#'effICGI$STUDY <- factor(effICGI$STUDY) +#'effICGI$ICGI2 <- effICGI$ICGI +#'effICGI <- tidyr::gather(effICGI,Endpoint,response,ICGI,ICGI2) +#' gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +#' response = "response", +#' endpoint = "Endpoint", +#' exposure_metrics = c("AUC","CMAX"), +#' exposure_metric_split = c("quartile"), +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' exposure_distribution ="distributions") +#' +#' # Example 2 +#'gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +#'response = "response", +#'endpoint = "Endpoint", +#'DOSE = "DOSE", +#'exposure_metrics = c("AUC"), +#'exposure_metric_split = c("quartile"), +#'exposure_distribution ="lineranges", +#'exposure_metric_soc_value = -99, +#'exposure_metric_plac_value = 0, +#'lineranges_ypos = -0.2, +#'prob_obs_bydose = TRUE, +#'yproj_xpos = -10, +#'yproj_dodge = 10, +#'dist_position_scaler = 0.15) +#' # Example 3 +#'library(ggh4x) +#'gglogisticexpdist(data = effICGI |> +#' dplyr::filter(Endpoint=="ICGI"), +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' dist_position_scaler = 0.15)+ +#' facet_grid2(Endpoint~expname+DOSE2,scales="free", +#' margins = "DOSE2",strip = strip_nested()) +#' # Example 4 +#'gglogisticexpdist(data = effICGI, +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' lineranges_ypos = -0.2, +#' yproj_xpos = -10, +#' yproj_dodge = 20, +#' prob_text_size = 9, +#' binlimits_text_size = 6, +#' N_text_size = 5, +#' dist_position_scaler = 0.15)+ +#' ggplot2::scale_x_continuous(breaks = seq(0,350,50), +#' expand = ggplot2::expansion(add= c(0,0),mult=c(0,0)))+ +#' ggplot2::coord_cartesian(xlim = c(-30,355))+ +#' ggplot2::facet_grid(~Endpoint) +#'\dontrun{ +#' #Example 5 +#' gglogisticexpdist(data = effICGI |> filter(Endpoint=="ICGI"), +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' dist_position_scaler = 0.15)+ +#' facet_grid(Endpoint~expname+exptile,scales="free", +#' margins = "exptile") +#'} +#' @export +gglogisticexpdist <- function(data = effICGI, + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC","CMAX"), + exposure_metric_split = c("median","tertile","quartile","none"), + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + exposure_distribution = c("distributions","lineranges","none"), + dose_plac_value = "Placebo", + xlab = "Exposure Values", + ylab ="Probability of Response", + prob_text_size = 5, + prob_obs_bydose = TRUE, + N_text_size = 5, + binlimits_text_size = 5, + dist_position_scaler = 0.2, + dist_offset = 0, + lineranges_ypos = 0.2, + yproj = TRUE, + yproj_xpos = 0, + yproj_dodge = 0.2, + yaxis_position = c("left","right"), + facet_formula = NULL, + theme_certara = TRUE +) { + + responseinputvar <- response + endpointinputvar <- endpoint + DOSEinputvar <- DOSE + exposure_metric_split <- match.arg(exposure_metric_split, several.ok = FALSE) + exposure_distribution <- match.arg(exposure_distribution, several.ok = FALSE) + yaxis_position <- match.arg(yaxis_position, several.ok = FALSE) + + effICGI = expname = expvalue = DOSE2 = quant = values = Ncat = Ntot = xmed = percentage = exptile = keynumeric = NULL + intercept = medexp = prob = SE = N = ndensity = Endpoint = NULL + + data <- data |> + dplyr::mutate(none = "(all)") # needed when no metric are chosen + + data.long <- data |> + tidyr::gather(expname,expvalue,!!!exposure_metrics, factor_key = TRUE) |> + dplyr::group_by(expname,!!sym(endpointinputvar) ) + + if(exposure_metric_split=="none") { + data.long <- data.long |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ NA, + expvalue == exposure_metric_plac_value ~ NA, + expvalue > exposure_metric_plac_value ~ expvalue)) + data.long$keynumeric<- - dist_position_scaler* as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,1), na.rm=TRUE), + quant = c(0,1) ) + + } + if(exposure_metric_split=="quartile") { + data.long <- data.long |> + dplyr::mutate( + Q25 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.25, na.rm=TRUE), + Q50 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.50, na.rm=TRUE), + Q75 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.75, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ "SOC", + expvalue == exposure_metric_plac_value ~ "Placebo", + expvalue > exposure_metric_plac_value & + expvalue <= Q25 ~ "Q1", + expvalue > Q25 & expvalue <= Q50 ~ "Q2", + expvalue > Q50 & expvalue <= Q75 ~ "Q3", + expvalue > Q75 ~ "Q4")) + + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,0.25,0.5,0.75,1), na.rm=TRUE), quant = c(0,0.25,0.5,0.75,1) ) + } + + if(exposure_metric_split=="tertile") { + data.long <- data.long |> + dplyr::mutate( + Q33 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 1/3, na.rm=TRUE), + Q66 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 2/3, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ "SOC", + expvalue == exposure_metric_plac_value ~" Placebo", + expvalue > exposure_metric_plac_value & + expvalue <= Q33 ~ "T1", + expvalue > Q33 & expvalue <= Q66 ~ "T2", + expvalue > Q66 ~ "T3")) + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,1/3,2/3,1), na.rm=TRUE), quant = c(0,1/3,2/3,1)) + + } + if(exposure_metric_split=="median") { + data.long <- data.long |> + dplyr::mutate( + Q50 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.5, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~"SOC", + expvalue == exposure_metric_plac_value ~"Placebo", + expvalue > 0 & expvalue <= Q50 ~ "M1", + expvalue > Q50 ~ "M2")) + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,0.5,1), na.rm=TRUE), quant = c(0,0.5,1) ) + + } + data.long$exptile2 <- data.long$exptile + data.long[,"DOSE2"] <- data.long[,DOSEinputvar] + + data.long.summaries.dose <- data.long |> + dplyr::group_by(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),DOSE2)|> + dplyr::reframe( + summary_df(expvalue,!!sym(responseinputvar))) |> + tidyr::pivot_wider(names_from= quant,values_from = values,names_glue = "quant_{100*quant}") + + + loopvariables <- unique(c(endpointinputvar,"expname")) + data.long.summaries.dose <- tidyr::unite(data.long.summaries.dose,"loopvariable", !!!loopvariables, remove = FALSE) + data.long <- tidyr::unite(data.long,"loopvariable", !!!loopvariables, remove = FALSE) + + logisticfit_by_endpoint <- list() + predict_by_endpoint_expname <- list() + predict_by_endpoint_expname_dose <- list() + predict_by_endpoint_expname_dose2 <- list() + + for (i in unique(data.long[,"loopvariable"]) |> + dplyr::pull() |> + as.character() ) { + logisticregdata<- data.long |> + dplyr::filter(.data[["loopvariable"]] ==i) + d <- rms::datadist(logisticregdata[, c(endpointinputvar,responseinputvar,DOSEinputvar,"expname","expvalue","exptile")]) + options(datadist= d) + logisticfit_by_endpoint_fit <- eval(bquote( rms::lrm( as.formula(paste(responseinputvar,"~","expvalue")) , data=logisticregdata,x=TRUE,y=TRUE) )) + logisticfit_by_endpoint[[i]] <- logisticfit_by_endpoint_fit + + data.long.summaries.dose.loop <- data.long.summaries.dose |> + dplyr::filter(.data[["loopvariable"]] ==i) + pred10exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_10)) + names(pred10exp)<- c("quant_10" , "ymid10", "ylow10", "yup10") + pred10exp$loopvariable <- i + pred90exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_90)) + names(pred90exp)<- c("quant_90" , "ymid90", "ylow90", "yup90") + pred90exp$loopvariable <- i + pred25exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_25)) + names(pred25exp)<- c("quant_25" , "ymid25", "ylow25", "yup25") + pred25exp$loopvariable <- i + pred75exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_75)) + names(pred75exp)<- c("quant_75" , "ymid75", "ylow75", "yup75") + pred75exp$loopvariable <- i + pred50exp<- as.data.frame (rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=data.long.summaries.dose.loop$medexp)) + names(pred50exp)<- c("medexp" , "ymid50", "ylow50", "yup50") + pred50exp$loopvariable <- i + + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred10exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred90exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred25exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred75exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred50exp) + predict_by_endpoint_expname[[i]] <- data.long.summaries.dose.loop + + predictionsbydose<- data.long.summaries.dose.loop |> + dplyr::group_by(!!sym(DOSEinputvar),!!sym(endpointinputvar),expname,DOSE2) |> + dplyr::do(as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=seq(.data$quant_10,.data$quant_90,0.1)))) + predict_by_endpoint_expname_dose[[i]] <- predictionsbydose + + predictionsbydose2<- data.long.summaries.dose.loop |> + dplyr::group_by(!!sym(DOSEinputvar),!!sym(endpointinputvar),expname,DOSE2) |> + dplyr::do(as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=seq(.data$quant_25,.data$quant_75,0.1)))) + predict_by_endpoint_expname_dose2[[i]] <- predictionsbydose2 + + } + predict_by_endpoint_expname <- data.table::rbindlist(predict_by_endpoint_expname) + predict_by_endpoint_expname_dose <- data.table::rbindlist(predict_by_endpoint_expname_dose) + predict_by_endpoint_expname_dose2 <- data.table::rbindlist(predict_by_endpoint_expname_dose2) + + + data.long.summaries.exposure <- data.long |> + dplyr::ungroup()|> + dplyr::group_by(!!sym(endpointinputvar),expname,exptile)|> + dplyr::reframe( + summary_df(expvalue,!!sym(responseinputvar))) |> + tidyr::pivot_wider(names_from= quant,values_from =values,names_glue = "quant_{100*quant}") + + percentineachbreakcategory <- data.long |> + dplyr::group_by(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),keynumeric,DOSE2)|> + dplyr::select(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),keynumeric,expvalue,exptile,DOSE2)|> + dplyr::group_by(!!sym(DOSEinputvar),keynumeric,expname,DOSE2) |> + dplyr::mutate(Ntot = dplyr::n())|> + dplyr::group_by(!!sym(DOSEinputvar),expname,exptile,keynumeric,DOSE2) |> + dplyr::mutate(Ncat = dplyr::n(),xmed=median(expvalue))|> + dplyr::mutate(percentage=Ncat/Ntot)|> + dplyr::distinct(!!sym(DOSEinputvar),xmed,exptile,expname,percentage,keynumeric,DOSE2)|> + dplyr::arrange(!!sym(DOSEinputvar)) + + facet_formula <- if (is.null(facet_formula) ) stats::as.formula( paste(endpointinputvar,"~","expname")) else + stats::as.formula(facet_formula) + #facet_formula <- Endpoint ~ expname + + p1 <- ggplot2::ggplot(data.long, + ggplot2::aes_string("expvalue", responseinputvar))+ + ggplot2::facet_grid(facet_formula, scales = "free")+ + #facet_nested(Endpoint~expname+DOSE2,scales="free",margins = "DOSE2")+ + ggplot2::geom_point(ggplot2::aes_string(col = DOSEinputvar), + alpha = 0.2, position = ggplot2::position_jitter(width = 0 , height = 0.05))+ + ggplot2::geom_hline(yintercept = c(0,1))+ + ggplot2::geom_vline(data = xintercepts, ggplot2::aes(xintercept = intercept), color = "gray70" )+ + ggplot2::geom_ribbon(data = data.long |> dplyr::mutate( DOSEinputvar := NULL, DOSE2 = NULL, exptile = NULL), + stat="smooth", + method = "glm", method.args = list(family = "binomial"), + color="transparent",linetype=0, alpha = 0.5, + ggplot2::aes(fill = "Logistic Fit 95% CI"))+ + ggplot2::geom_line(data = data.long |> dplyr::mutate( DOSEinputvar := NULL, DOSE2 = NULL, exptile = NULL), + stat="smooth", + method = "glm", method.args = list(family = "binomial"), + color="black", alpha = 0.5, + ggplot2::aes(linetype = "Logistic Fit 95% CI"))+ + ggplot2::geom_line(data = predict_by_endpoint_expname_dose, + ggplot2::aes_string(y = "yhat", col = DOSEinputvar), + alpha = 0.4, size = 2)+ + ggplot2::geom_line(data = predict_by_endpoint_expname_dose2, + ggplot2::aes_string(y = "yhat", col = DOSEinputvar), + alpha = 0.4, size = 2.5)+ + ggplot2::geom_point(data = predict_by_endpoint_expname, + ggplot2::aes_string(x = "medexp", y = "ymid50", col = DOSEinputvar), + alpha = 0.4, size = 5) + + + if(exposure_distribution=="lineranges") { + lineranges_ypos <- as.character(lineranges_ypos) + p1l <- p1 + + ggplot2::geom_linerange(data = data.long.summaries.dose, size = 2, alpha = 0.4, + ggplot2::aes_string(xmin = "quant_10", xmax = "quant_90",y = lineranges_ypos, + col = DOSEinputvar, group = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15),inherit.aes = FALSE)+ + ggplot2::geom_linerange(data = data.long.summaries.dose, size = 2.5, alpha = 0.4, + ggplot2::aes_string(xmin = "quant_25", xmax= "quant_75", y = lineranges_ypos, + col = DOSEinputvar, group = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15), inherit.aes = FALSE)+ + ggplot2::geom_point(data=data.long.summaries.dose, size = 5, alpha = 0.2, + ggplot2::aes_string(x="medexp",y = lineranges_ypos, + col = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15)) + } + if(exposure_distribution!="lineranges") { + p1l <- p1 + } + + p2e <- p1l + + ggplot2::geom_pointrange(data = data.long.summaries.exposure, size = 1, + ggplot2::aes(shape = "Observed probability by exposure split", + x = medexp, y = prob, ymin = prob+1.959*SE, ymax=prob-1.959*SE), + alpha = 0.5) + if(prob_obs_bydose){ + data.long.summaries.dose.plot <- data.long.summaries.dose + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"N"] <- NA + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"Ntot"] <- NA + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"prob"] <- NA + + p2d <- p2e + + ggplot2::geom_pointrange(data = data.long.summaries.dose.plot, alpha = 0.5, size = 1, + ggplot2::aes(x = medexp, y = prob, col = !!sym(DOSEinputvar), + ymin = prob+1.959*SE, ymax=prob-1.959*SE, + shape = "Observed probability by dose split"), + show.legend = FALSE) + + ggplot2::geom_text(data=data.long.summaries.dose.plot, vjust = 1, size = prob_text_size, show.legend = FALSE, + ggplot2::aes(x = medexp, y = prob, col = !!sym(DOSEinputvar), + label = paste( + paste("\n",100*round(prob,2),"%",sep=""), + "\n",N,"/",Ntot,sep="") + )) + + } + if(!prob_obs_bydose){ + p2d <- p2e + } + p2 <- p2d + + ggplot2::geom_text(data=data.long.summaries.exposure, vjust = 0, size = prob_text_size, show.legend = FALSE, + ggplot2::aes(x = medexp, y = prob, label = paste(100*round(prob,2),"%","\n",sep="")))+ + ggplot2::geom_text(data = xintercepts, ggplot2::aes(label=round(intercept,1), x = intercept, y = 0) , + vjust = 1, size = binlimits_text_size,color = "gray70")+ + ggplot2::geom_text(data = data.long.summaries.exposure, y = 0, vjust = 0, size = N_text_size, + ggplot2::aes(x = as.double(as.character(medexp)), label=paste(N,"/",Ntot,sep=""))) + + if(exposure_distribution=="distributions") { + data.long.ridges <- data.long + data.long.ridges[data.long.ridges[,DOSEinputvar]==dose_plac_value,"expvalue"] <- NA + p2d <- p2 + + ggridges::geom_density_ridges(data = data.long.ridges, + ggplot2::aes(x = expvalue, y = keynumeric, + group = !!rlang::sym(DOSEinputvar), + col = !!rlang::sym(DOSEinputvar), + height = ggplot2::after_stat(ndensity)), + rel_min_height = 0.05,alpha = 0.1, scale = 0.9, + quantile_lines = TRUE, quantiles = c(0.1,0.25, 0.5, 0.75,0.9))+ + ggplot2::geom_label(data = percentineachbreakcategory, + ggplot2::aes(color = !!rlang::sym(DOSEinputvar), y = keynumeric, x= xmed, label = round(100*percentage,0) ), + alpha = 0.5, show.legend = FALSE) + } + if(exposure_distribution!="distributions") { + p2d <- p2 + } + + if(yproj) { + yproj_xpos <- as.character(yproj_xpos) + + p2df <- p2d + + ggplot2::geom_linerange(data = predict_by_endpoint_expname, alpha = 0.4, size = 2, + ggplot2::aes_string(x = yproj_xpos, ymin = "ymid10", ymax = "ymid90", col = DOSEinputvar, group = DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE)+ + ggplot2::geom_linerange(data = predict_by_endpoint_expname, alpha = 0.4, size = 2.5, + ggplot2::aes_string(x = yproj_xpos, ymin = "ymid25", ymax = "ymid75", col= DOSEinputvar, group= DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE)+ + ggplot2::geom_point(data=predict_by_endpoint_expname, alpha = 0.4, size = 3, + ggplot2::aes_string(x = yproj_xpos, y = "ymid50", col = DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE) + } + if(!yproj) { + p2df <- p2d + } + if(exposure_distribution =="distributions"){ + p2df2 <- p2df + + ggplot2::scale_y_continuous(position = yaxis_position, + breaks =c(unique(data.long$keynumeric), + c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ), + labels= c(as.vector(unique(data.long$DOSE)), + c("0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9","1") ), + expand = ggplot2::expansion(mult=c(0.01,0.01), add =c(0, 0))) + } + if(exposure_distribution =="lineranges"){ + p2df2 <- p2df + + ggplot2::scale_y_continuous(position = yaxis_position, + breaks =c( + c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ), + labels= c( + c("0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9","1") ), + expand = ggplot2::expansion(mult=c(0.01,0.01), add =c(0, 0))) + } + p2df2 + + ggplot2::labs(fill="", linetype="", shape="", x = xlab, y = ylab) + + ggplot2::theme_bw(base_size = 18)+ + ggplot2::theme(legend.position = "top",strip.placement = "outside", + axis.title.y = ggplot2::element_blank())+ + ggplot2::scale_colour_manual(values = c( "#4682AC","#FDBB2F","#EE3124" ,"#336343","#7059a6", "#803333"), + drop=FALSE,na.value = "grey50")+ + ggplot2::scale_fill_manual( values = c("gray80", + "#4682AC","#FDBB2F","#EE3124" ,"#336343","#7059a6", "#803333"), + drop=FALSE,na.value = "grey50")+ + ggplot2::theme(strip.background = ggplot2::element_rect(fill="#475c6b"), + strip.text = ggplot2::element_text(face = "bold",color = "white")) + +} + +# library(ggquickeda) +# effICGI <- logistic_data |> +# dplyr::filter(!is.na(ICGI))|> +# dplyr::filter(!is.na(AUC)) +# effICGI$DOSE <- factor(effICGI$DOSE, +# levels=c("0", "600", "1200","1800","2400"), +# labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +# effICGI$STUDY <- factor(effICGI$STUDY) +# effICGI$ICGI2 <- effICGI$ICGI +# effICGI <- tidyr::gather(effICGI,Endpoint,response,ICGI,ICGI2) +# +# +# a <- gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +# response = "response", +# endpoint = "Endpoint", +# DOSE = "DOSE",yproj_dodge = 36, +# exposure_metrics = c("AUC"), +# exposure_metric_split = c("quartile"), +# exposure_distribution ="lineranges", +# exposure_metric_soc_value = -99, +# exposure_metric_plac_value = 0) + +# facet_grid(Endpoint~expname,switch = "both") +# b <- gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +# response = "response", +# endpoint = "Endpoint", +# DOSE = "DOSE",yproj_dodge = 2, +# exposure_metrics = c("CMAX"), +# exposure_metric_split = c("quartile"), +# exposure_distribution ="lineranges", +# exposure_metric_soc_value = -99, +# exposure_metric_plac_value = 0, +# yaxis_position = "right")+ +# facet_grid(Endpoint~expname,switch = "x")+ +# theme(strip.text.y.right = element_blank(), +# strip.background.y = element_blank()) +# library(patchwork) +# (a | b ) + +# plot_layout(guides = "collect")& +# theme(legend.position = "top") diff --git a/R/gglogisticexpdist.R.bak b/R/gglogisticexpdist.R.bak new file mode 100644 index 0000000..a740cc7 --- /dev/null +++ b/R/gglogisticexpdist.R.bak @@ -0,0 +1,571 @@ +#' @importFrom rlang `:=` +#' @importFrom rlang .data +#' @importFrom rlang sym +#' @import tidyr +#' @importFrom stats median +#' @importFrom stats quantile + +summary_df <- function(x,y, probs = c(0.25,0.75,0.90,0.10)) { + N = Ntot = prob = NULL + tibble::tibble( + minexp = min(x), + maxexp = max(x), + medexp = stats::median(x), + meanexp = mean(x), + N = sum(y,na.rm=TRUE), + Nmiss = length(x[is.na(x)]), + Ntot = dplyr::n(), + prob = N/Ntot, + SE = sqrt(prob*(1-prob)/Ntot ), + values = stats::quantile(x, probs, na.rm = TRUE), + quant = probs + ) +} +#plogis <- function(x) exp(x)/(1+exp(x)) + +# +# effICGI <- data(logistic_data) |> +# filter(!is.na(ICGI))|> +# filter(!is.na(AUC)) +# +# effICGI$DOSE <- factor(effICGI$DOSE) +# effICGI$DOSE <- factor(effICGI$DOSE, +# levels=c("0", "600", "1200","1800","2400"), +# labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +# effICGI$STUDY <- factor(effICGI$STUDY) +# effICGI$ICGI2 <- effICGI$ICGI +# effICGI <- gather(effICGI,Endpoint,response,ICGI,ICGI2) + +#' gglogisticexpdist +#' +#' Produces a logistic fit plot with a facettable exposures/quantiles/distributions in ggplot2 +#' @param data Data to use with multiple endpoints stacked into Endpoint(endpoint name), response 0/1 +#' @param response name of the column holding the valuesresponse 0/1 +#' @param endpoint name of the column holding the name/key of the endpoint default to `Endpoint` +#' @param DOSE name of the column holding the DOSE values default to `DOSE` +#' @param exposure_metrics name(s) of the column(s) to be stacked into `expname` `exptile` and split into `exposure_metric_split` +#' @param exposure_metric_split one of "median", "tertile", "quartile", "none" +#' @param exposure_metric_soc_value special exposure code for standard of care default -99 +#' @param exposure_metric_plac_value special exposure code for placebo default 0 +#' @param exposure_distribution one of distributions, lineranges or none +#' @param dose_plac_value string identifying placebo in DOSE column +#' @param xlab text to be used as x axis label +#' @param ylab text to be used as y axis label +#' @param prob_text_size probability text size default to 5 +#' @param prob_obs_bydose TRUE/FALSE +#' @param N_text_size N responders/Ntotal by exposure bin text size default to 5 +#' @param binlimits_text_size 5 binlimits text size +#' @param dist_position_scaler space occupied by the distribution default to 0.2 +#' @param dist_offset offset where the distribution position starts 0 +#' @param lineranges_ypos where to put the lineranges 0.2 +#' @param yproj project the probabilities on y axis TRUE/FALSE +#' @param yproj_xpos y projection x position 0 +#' @param yproj_dodge y projection dodge value 0.2 +#' @param yaxis_position where to put y axis "left" or "right" +#' @param facet_formula facet formula to be use otherwise endpoint ~ expname +#' @param theme_certara apply certara colors and format for strips and default colour/fill +#' @examples +#' # Example 1 +#' effICGI <- logistic_data |> +#' dplyr::filter(!is.na(ICGI))|> +#' dplyr::filter(!is.na(AUC)) +#'effICGI$DOSE <- factor(effICGI$DOSE, +#' levels=c("0", "600", "1200","1800","2400"), +#' labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +#'effICGI$STUDY <- factor(effICGI$STUDY) +#'effICGI$ICGI2 <- effICGI$ICGI +#'effICGI <- tidyr::gather(effICGI,Endpoint,response,ICGI,ICGI2) +#' gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +#' response = "response", +#' endpoint = "Endpoint", +#' exposure_metrics = c("AUC","CMAX"), +#' exposure_metric_split = c("quartile"), +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' exposure_distribution ="distributions") +#' +#' # Example 2 +#'gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +#'response = "response", +#'endpoint = "Endpoint", +#'DOSE = "DOSE", +#'exposure_metrics = c("AUC"), +#'exposure_metric_split = c("quartile"), +#'exposure_distribution ="lineranges", +#'exposure_metric_soc_value = -99, +#'exposure_metric_plac_value = 0, +#'lineranges_ypos = -0.2, +#'prob_obs_bydose = TRUE, +#'yproj_xpos = -10, +#'yproj_dodge = 10, +#'dist_position_scaler = 0.15) +#' +#' # Example 3 +#'library(ggh4x) +#'gglogisticexpdist(data = effICGI |> +#' filter(Endpoint=="ICGI"), +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' dist_position_scaler = 0.15)+ +#' facet_grid2(Endpoint~expname+DOSE2,scales="free", +#' margins = "DOSE2",strip = strip_nested()) +#' # Example 4 +#'gglogisticexpdist(data = effICGI, +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' lineranges_ypos = -0.2, +#' yproj_xpos = -10, +#' yproj_dodge = 20, +#' prob_text_size = 9, +#' binlimits_text_size = 6, +#' N_text_size = 5, +#' dist_position_scaler = 0.15)+ +#' scale_x_continuous(breaks = seq(0,350,50), +#' expand = expansion(add= c(0,0),mult=c(0,0)))+ +#' coord_cartesian(xlim = c(-30,355))+ +#' facet_grid(~Endpoint) +#'\dontrun{ +#' #Example 5 +#' gglogisticexpdist(data = effICGI |> filter(Endpoint=="ICGI"), +#' response = "response", +#' endpoint = "Endpoint", +#' DOSE = "DOSE", +#' exposure_metrics = c("AUC"), +#' exposure_metric_split = c("quartile"), +#' exposure_distribution ="distributions", +#' exposure_metric_soc_value = -99, +#' exposure_metric_plac_value = 0, +#' dist_position_scaler = 0.15)+ +#' facet_grid(Endpoint~expname+exptile,scales="free", +#' margins = "exptile") +#'} +#' @export +gglogisticexpdist <- function(data = effICGI, + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC","CMAX"), + exposure_metric_split = c("median","tertile","quartile","none"), + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + exposure_distribution = c("distributions","lineranges","none"), + dose_plac_value = "Placebo", + xlab = "Exposure Values", + ylab ="Probability of Response", + prob_text_size = 5, + prob_obs_bydose = TRUE, + N_text_size = 5, + binlimits_text_size = 5, + dist_position_scaler = 0.2, + dist_offset = 0, + lineranges_ypos = 0.2, + yproj = TRUE, + yproj_xpos = 0, + yproj_dodge = 0.2, + yaxis_position = c("left","right"), + facet_formula = NULL, + theme_certara = TRUE +) { + + responseinputvar <- response + endpointinputvar <- endpoint + DOSEinputvar <- DOSE + exposure_metric_split <- match.arg(exposure_metric_split, several.ok = FALSE) + exposure_distribution <- match.arg(exposure_distribution, several.ok = FALSE) + yaxis_position <- match.arg(yaxis_position, several.ok = FALSE) + + effICGI = expname = expvalue = DOSE2 = quant = values = plogis = Ncat = Ntot = xmed = percentage = exptile= keynumeric=NULL + intercept = medexp = prob = SE = N = ndensity = NULL + + data <- data |> + dplyr::mutate(none = "(all)") # needed when no metric are chosen + + data.long <- data |> + tidyr::gather(expname,expvalue,!!!exposure_metrics, factor_key = TRUE) |> + dplyr::group_by(expname,!!sym(endpointinputvar) ) + + if(exposure_metric_split=="none") { + data.long <- data.long |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ NA, + expvalue == exposure_metric_plac_value ~ NA, + expvalue > exposure_metric_plac_value ~ expvalue)) + data.long$keynumeric<- - dist_position_scaler* as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,1), na.rm=TRUE), + quant = c(0,1) ) + + } + if(exposure_metric_split=="quartile") { + data.long <- data.long |> + dplyr::mutate( + Q25 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.25, na.rm=TRUE), + Q50 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.50, na.rm=TRUE), + Q75 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.75, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ "SOC", + expvalue == exposure_metric_plac_value ~ "Placebo", + expvalue > exposure_metric_plac_value & + expvalue <= Q25 ~ "Q1", + expvalue > Q25 & expvalue <= Q50 ~ "Q2", + expvalue > Q50 & expvalue <= Q75 ~ "Q3", + expvalue > Q75 ~ "Q4")) + + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,0.25,0.5,0.75,1), na.rm=TRUE), quant = c(0,0.25,0.5,0.75,1) ) + } + + if(exposure_metric_split=="tertile") { + data.long <- data.long |> + dplyr::mutate( + Q33 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 1/3, na.rm=TRUE), + Q66 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 2/3, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~ "SOC", + expvalue == exposure_metric_plac_value ~" Placebo", + expvalue > exposure_metric_plac_value & + expvalue <= Q33 ~ "T1", + expvalue > Q33 & expvalue <= Q66 ~ "T2", + expvalue > Q66 ~ "T3")) + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,1/3,2/3,1), na.rm=TRUE), quant = c(0,1/3,2/3,1)) + + } + if(exposure_metric_split=="median") { + data.long <- data.long |> + dplyr::mutate( + Q50 = stats::quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value, + exposure_metric_plac_value)], 0.5, na.rm=TRUE)) |> + dplyr::mutate(exptile = dplyr::case_when( + expvalue == exposure_metric_soc_value ~"SOC", + expvalue == exposure_metric_plac_value ~"Placebo", + expvalue > 0 & expvalue <= Q50 ~ "M1", + expvalue > Q50 ~ "M2")) + data.long$keynumeric <- - dist_position_scaler*as.numeric(forcats::fct_rev(as.factor(dplyr::pull(data.long[,DOSEinputvar])))) + dist_offset + xintercepts <- data.long|> + dplyr::group_by(expname,!!sym(endpointinputvar) )|> + dplyr::reframe(intercept = stats::quantile(expvalue[!expvalue %in% + c(exposure_metric_soc_value, exposure_metric_plac_value)], + c(0,0.5,1), na.rm=TRUE), quant = c(0,0.5,1) ) + + } + data.long$exptile2 <- data.long$exptile + data.long[,"DOSE2"] <- data.long[,DOSEinputvar] + + data.long.summaries.dose <- data.long |> + dplyr::group_by(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),DOSE2)|> + dplyr::reframe( + summary_df(expvalue,!!sym(responseinputvar))) |> + tidyr::pivot_wider(names_from= quant,values_from = values,names_glue = "quant_{100*quant}") + + + loopvariables <- unique(c(endpointinputvar,"expname")) + data.long.summaries.dose <- tidyr::unite(data.long.summaries.dose,"loopvariable", !!!loopvariables, remove = FALSE) + data.long <- tidyr::unite(data.long,"loopvariable", !!!loopvariables, remove = FALSE) + + logisticfit_by_endpoint <- list() + predict_by_endpoint_expname <- list() + predict_by_endpoint_expname_dose <- list() + predict_by_endpoint_expname_dose2 <- list() + + for (i in unique(data.long[,"loopvariable"]) |> + dplyr::pull() |> + as.character() ) { + logisticregdata<- data.long |> + dplyr::filter(.data[["loopvariable"]] ==i) + d <- rms::datadist(logisticregdata[, c(endpointinputvar,responseinputvar,DOSEinputvar,"expname","expvalue","exptile")]) + options(datadist= d) + logisticfit_by_endpoint_fit <- eval(bquote( rms::lrm( as.formula(paste(responseinputvar,"~","expvalue")) , data=logisticregdata,x=TRUE,y=TRUE) )) + logisticfit_by_endpoint[[i]] <- logisticfit_by_endpoint_fit + + data.long.summaries.dose.loop <- data.long.summaries.dose |> + dplyr::filter(.data[["loopvariable"]] ==i) + pred10exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_10)) + names(pred10exp)<- c("quant_10" , "ymid10", "ylow10", "yup10") + pred10exp$loopvariable <- i + pred90exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_90)) + names(pred90exp)<- c("quant_90" , "ymid90", "ylow90", "yup90") + pred90exp$loopvariable <- i + pred25exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_25)) + names(pred25exp)<- c("quant_25" , "ymid25", "ylow25", "yup25") + pred25exp$loopvariable <- i + pred75exp <- as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue= data.long.summaries.dose.loop$quant_75)) + names(pred75exp)<- c("quant_75" , "ymid75", "ylow75", "yup75") + pred75exp$loopvariable <- i + pred50exp<- as.data.frame (rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=data.long.summaries.dose.loop$medexp)) + names(pred50exp)<- c("medexp" , "ymid50", "ylow50", "yup50") + pred50exp$loopvariable <- i + + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred10exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred90exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred25exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred75exp) + data.long.summaries.dose.loop<- dplyr::left_join(data.long.summaries.dose.loop,pred50exp) + predict_by_endpoint_expname[[i]] <- data.long.summaries.dose.loop + + predictionsbydose<- data.long.summaries.dose.loop |> + dplyr::group_by(!!sym(DOSEinputvar),!!sym(endpointinputvar),expname,DOSE2) |> + dplyr::do(as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=seq(.data$quant_10,.data$quant_90,0.1)))) + predict_by_endpoint_expname_dose[[i]] <- predictionsbydose + + predictionsbydose2<- data.long.summaries.dose.loop |> + dplyr::group_by(!!sym(DOSEinputvar),!!sym(endpointinputvar),expname,DOSE2) |> + dplyr::do(as.data.frame(rms::Predict(logisticfit_by_endpoint_fit,fun=plogis, + expvalue=seq(.data$quant_25,.data$quant_75,0.1)))) + predict_by_endpoint_expname_dose2[[i]] <- predictionsbydose2 + + } + predict_by_endpoint_expname <- data.table::rbindlist(predict_by_endpoint_expname) + predict_by_endpoint_expname_dose <- data.table::rbindlist(predict_by_endpoint_expname_dose) + predict_by_endpoint_expname_dose2 <- data.table::rbindlist(predict_by_endpoint_expname_dose2) + + + data.long.summaries.exposure <- data.long |> + dplyr::ungroup()|> + dplyr::group_by(!!sym(endpointinputvar),expname,exptile)|> + dplyr::reframe( + summary_df(expvalue,!!sym(responseinputvar))) |> + tidyr::pivot_wider(names_from= quant,values_from =values,names_glue = "quant_{100*quant}") + + percentineachbreakcategory <- data.long |> + dplyr::group_by(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),keynumeric,DOSE2)|> + dplyr::select(!!sym(endpointinputvar),expname,!!sym(DOSEinputvar),keynumeric,expvalue,exptile,DOSE2)|> + dplyr::group_by(!!sym(DOSEinputvar),keynumeric,expname,DOSE2) |> + dplyr::mutate(Ntot = dplyr::n())|> + dplyr::group_by(!!sym(DOSEinputvar),expname,exptile,keynumeric,DOSE2) |> + dplyr::mutate(Ncat = dplyr::n(),xmed=median(expvalue))|> + dplyr::mutate(percentage=Ncat/Ntot)|> + dplyr::distinct(!!sym(DOSEinputvar),xmed,exptile,expname,percentage,keynumeric,DOSE2)|> + dplyr::arrange(!!sym(DOSEinputvar)) + + facet_formula <- if (is.null(facet_formula) ) stats::as.formula( paste(endpointinputvar,"~","expname")) else + stats::as.formula(facet_formula) + #facet_formula <- Endpoint ~ expname + + p1 <- ggplot2::ggplot(data.long, + ggplot2::aes_string("expvalue", responseinputvar))+ + ggplot2::facet_grid(facet_formula, scales = "free")+ + #facet_nested(Endpoint~expname+DOSE2,scales="free",margins = "DOSE2")+ + ggplot2::geom_point(ggplot2::aes_string(col = DOSEinputvar), + alpha = 0.2, position = ggplot2::position_jitter(width = 0 , height = 0.05))+ + ggplot2::geom_hline(yintercept = c(0,1))+ + ggplot2::geom_vline(data = xintercepts, ggplot2::aes(xintercept = intercept), color = "gray70" )+ + ggplot2::geom_ribbon(data = data.long |> dplyr::mutate( DOSEinputvar := NULL, DOSE2 = NULL, exptile = NULL), + stat="smooth", + method = "glm", method.args = list(family = "binomial"), + color="transparent",linetype=0, alpha = 0.5, + ggplot2::aes(fill = "Logistic Fit 95% CI"))+ + ggplot2::geom_line(data = data.long |> dplyr::mutate( DOSEinputvar := NULL, DOSE2 = NULL, exptile = NULL), + stat="smooth", + method = "glm", method.args = list(family = "binomial"), + color="black", alpha = 0.5, + ggplot2::aes(linetype = "Logistic Fit 95% CI"))+ + ggplot2::geom_line(data = predict_by_endpoint_expname_dose, + ggplot2::aes_string(y = "yhat", col = DOSEinputvar), + alpha = 0.4, size = 2)+ + ggplot2::geom_line(data = predict_by_endpoint_expname_dose2, + ggplot2::aes_string(y = "yhat", col = DOSEinputvar), + alpha = 0.4, size = 2.5)+ + ggplot2::geom_point(data = predict_by_endpoint_expname, + ggplot2::aes_string(x = "medexp", y = "ymid50", col = DOSEinputvar), + alpha = 0.4, size = 5) + + + if(exposure_distribution=="lineranges") { + lineranges_ypos <- as.character(lineranges_ypos) + p1l <- p1 + + ggplot2::geom_linerange(data = data.long.summaries.dose, size = 2, alpha = 0.4, + ggplot2::aes_string(xmin = "quant_10", xmax = "quant_90",y = lineranges_ypos, + col = DOSEinputvar, group = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15),inherit.aes = FALSE)+ + ggplot2::geom_linerange(data = data.long.summaries.dose, size = 2.5, alpha = 0.4, + ggplot2::aes_string(xmin = "quant_25", xmax= "quant_75", y = lineranges_ypos, + col = DOSEinputvar, group = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15), inherit.aes = FALSE)+ + ggplot2::geom_point(data=data.long.summaries.dose, size = 5, alpha = 0.2, + ggplot2::aes_string(x="medexp",y = lineranges_ypos, + col = DOSEinputvar), + position = ggstance::position_dodgev(height = 0.15)) + } + if(exposure_distribution!="lineranges") { + p1l <- p1 + } + + p2e <- p1l + + ggplot2::geom_pointrange(data = data.long.summaries.exposure, size = 1, + ggplot2::aes(shape = "Observed probability by exposure split", + x = medexp, y = prob, ymin = prob+1.959*SE, ymax=prob-1.959*SE), + alpha = 0.5) + if(prob_obs_bydose){ + data.long.summaries.dose.plot <- data.long.summaries.dose + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"N"] <- NA + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"Ntot"] <- NA + data.long.summaries.dose.plot[data.long.summaries.dose.plot[,DOSEinputvar]==dose_plac_value,"prob"] <- NA + + p2d <- p2e + + ggplot2::geom_pointrange(data = data.long.summaries.dose.plot, alpha = 0.5, size = 1, + ggplot2::aes(x = medexp, y = prob, col = !!sym(DOSEinputvar), + ymin = prob+1.959*SE, ymax=prob-1.959*SE, + shape = "Observed probability by dose split"), + show.legend = FALSE) + + ggplot2::geom_text(data=data.long.summaries.dose.plot, vjust = 1, size = prob_text_size, show.legend = FALSE, + ggplot2::aes(x = medexp, y = prob, col = !!sym(DOSEinputvar), + label = paste( + paste("\n",100*round(prob,2),"%",sep=""), + "\n",N,"/",Ntot,sep="") + )) + + } + if(!prob_obs_bydose){ + p2d <- p2e + } + p2 <- p2d + + ggplot2::geom_text(data=data.long.summaries.exposure, vjust = 0, size = prob_text_size, show.legend = FALSE, + ggplot2::aes(x = medexp, y = prob, label = paste(100*round(prob,2),"%","\n",sep="")))+ + ggplot2::geom_text(data = xintercepts, ggplot2::aes(label=round(intercept,1), x = intercept, y = 0) , + vjust = 1, size = binlimits_text_size,color = "gray70")+ + ggplot2::geom_text(data = data.long.summaries.exposure, y = 0, vjust = 0, size = N_text_size, + ggplot2::aes(x = as.double(as.character(medexp)), label=paste(N,"/",Ntot,sep=""))) + + if(exposure_distribution=="distributions") { + data.long.ridges <- data.long + data.long.ridges[data.long.ridges[,DOSEinputvar]==dose_plac_value,"expvalue"] <- NA + p2d <- p2 + + ggridges::geom_density_ridges(data = data.long.ridges, + ggplot2::aes(x = expvalue, y = keynumeric, + group = !!rlang::sym(DOSEinputvar), + col = !!rlang::sym(DOSEinputvar), + height = ggplot2::after_stat(ndensity)), + rel_min_height = 0.05,alpha = 0.1, scale = 0.9, + quantile_lines = TRUE, quantiles = c(0.1,0.25, 0.5, 0.75,0.9))+ + ggplot2::geom_label(data = percentineachbreakcategory, + ggplot2::aes(color = !!rlang::sym(DOSEinputvar), y = keynumeric, x= xmed, label = round(100*percentage,0) ), + alpha = 0.5, show.legend = FALSE) + } + if(exposure_distribution!="distributions") { + p2d <- p2 + } + + if(yproj) { + yproj_xpos <- as.character(yproj_xpos) + + p2df <- p2d + + ggplot2::geom_linerange(data = predict_by_endpoint_expname, alpha = 0.4, size = 2, + ggplot2::aes_string(x = yproj_xpos, ymin = "ymid10", ymax = "ymid90", col = DOSEinputvar, group = DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE)+ + ggplot2::geom_linerange(data = predict_by_endpoint_expname, alpha = 0.4, size = 2.5, + ggplot2::aes_string(x = yproj_xpos, ymin = "ymid25", ymax = "ymid75", col= DOSEinputvar, group= DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE)+ + ggplot2::geom_point(data=predict_by_endpoint_expname, alpha = 0.4, size = 3, + ggplot2::aes_string(x = yproj_xpos, y = "ymid50", col = DOSEinputvar), + position = ggplot2::position_dodge(width = yproj_dodge), inherit.aes = FALSE) + } + if(!yproj) { + p2df <- p2d + } + if(exposure_distribution =="distributions"){ + p2df2 <- p2df + + ggplot2::scale_y_continuous(position = yaxis_position, + breaks =c(unique(data.long$keynumeric), + c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ), + labels= c(as.vector(unique(data.long$DOSE)), + c("0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9","1") ), + expand = ggplot2::expansion(mult=c(0.01,0.01), add =c(0, 0))) + } + if(exposure_distribution =="lineranges"){ + p2df2 <- p2df + + ggplot2::scale_y_continuous(position = yaxis_position, + breaks =c( + c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ), + labels= c( + c("0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9","1") ), + expand = ggplot2::expansion(mult=c(0.01,0.01), add =c(0, 0))) + } + p2df2 + + ggplot2::labs(fill="", linetype="", shape="", x = xlab, y = ylab) + + ggplot2::theme_bw(base_size = 18)+ + ggplot2::theme(legend.position = "top",strip.placement = "outside", + axis.title.y = ggplot2::element_blank())+ + ggplot2::scale_colour_manual(values = c( "#4682AC","#FDBB2F","#EE3124" ,"#336343","#7059a6", "#803333"), + drop=FALSE,na.value = "grey50")+ + ggplot2::scale_fill_manual( values = c("gray80", + "#4682AC","#FDBB2F","#EE3124" ,"#336343","#7059a6", "#803333"), + drop=FALSE,na.value = "grey50")+ + ggplot2::theme(strip.background = ggplot2::element_rect(fill="#475c6b"), + strip.text = ggplot2::element_text(face = "bold",color = "white")) + +} + +library(ggquickeda) +effICGI <- logistic_data |> +dplyr::filter(!is.na(ICGI))|> +dplyr::filter(!is.na(AUC)) +effICGI$DOSE <- factor(effICGI$DOSE, + levels=c("0", "600", "1200","1800","2400"), + labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +effICGI$STUDY <- factor(effICGI$STUDY) +effICGI$ICGI2 <- effICGI$ICGI +effICGI <- tidyr::gather(effICGI,Endpoint,response,ICGI,ICGI2) + + +a <- gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE",yproj_dodge = 36, + exposure_metrics = c("AUC"), + exposure_metric_split = c("quartile"), + exposure_distribution ="lineranges", + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0) + + facet_grid(Endpoint~expname,switch = "both") +b <- gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE",yproj_dodge = 2, + exposure_metrics = c("CMAX"), + exposure_metric_split = c("quartile"), + exposure_distribution ="lineranges", + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + yaxis_position = "right")+ + facet_grid(Endpoint~expname,switch = "x")+ + theme(strip.text.y.right = element_blank(), + strip.background.y = element_blank()) +library(patchwork) +(a | b ) + + plot_layout(guides = "collect")& + theme(legend.position = "top") diff --git a/R/logisticdata.R b/R/logisticdata.R new file mode 100644 index 0000000..9890b70 --- /dev/null +++ b/R/logisticdata.R @@ -0,0 +1,24 @@ +#' Simulated Exposure Response Data +#' +#' A dataset containing data suitable for logistic regression +#' +#' @format A data frame with 600 rows and 10 variables +#' \describe{ +#' \item{STUDY}{Study identifier} +#' \item{ID}{Subject Identifier} +#' \item{DOSE}{Dose, in mg} +#' \item{GBDS}{Dose, in alternative salt} +#' \item{SEX}{Sex of the subject} +#' \item{AGE}{age of the subject, in years} +#' \item{WT}{weight of the subject, in kg} +#' \item{RACE}{Race of the subject} +#' \item{CRCL}{Creatinine clearance} +#' \item{BRLS}{RLS score} +#' \item{PRLS}{RLS score} +#' \item{AUC}{Area under the curve exposure} +#' \item{CMAX}{Maximun concentration exposure} +#' \item{ICGI}{response 0/1} +#' \item{ICGI7}{response 1 to 7} +#' } +#' @source inspired from a real data submission +"logistic_data" diff --git a/man/gglogisticexpdist.Rd b/man/gglogisticexpdist.Rd new file mode 100644 index 0000000..c5e34d2 --- /dev/null +++ b/man/gglogisticexpdist.Rd @@ -0,0 +1,176 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gglogisticexpdist.R +\name{gglogisticexpdist} +\alias{gglogisticexpdist} +\title{gglogisticexpdist} +\usage{ +gglogisticexpdist( + data = effICGI, + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC", "CMAX"), + exposure_metric_split = c("median", "tertile", "quartile", "none"), + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + exposure_distribution = c("distributions", "lineranges", "none"), + dose_plac_value = "Placebo", + xlab = "Exposure Values", + ylab = "Probability of Response", + prob_text_size = 5, + prob_obs_bydose = TRUE, + N_text_size = 5, + binlimits_text_size = 5, + dist_position_scaler = 0.2, + dist_offset = 0, + lineranges_ypos = 0.2, + yproj = TRUE, + yproj_xpos = 0, + yproj_dodge = 0.2, + yaxis_position = c("left", "right"), + facet_formula = NULL, + theme_certara = TRUE +) +} +\arguments{ +\item{data}{Data to use with multiple endpoints stacked into Endpoint(endpoint name), response 0/1} + +\item{response}{name of the column holding the valuesresponse 0/1} + +\item{endpoint}{name of the column holding the name/key of the endpoint default to `Endpoint`} + +\item{DOSE}{name of the column holding the DOSE values default to `DOSE`} + +\item{exposure_metrics}{name(s) of the column(s) to be stacked into `expname` `exptile` and split into `exposure_metric_split`} + +\item{exposure_metric_split}{one of "median", "tertile", "quartile", "none"} + +\item{exposure_metric_soc_value}{special exposure code for standard of care default -99} + +\item{exposure_metric_plac_value}{special exposure code for placebo default 0} + +\item{exposure_distribution}{one of distributions, lineranges or none} + +\item{dose_plac_value}{string identifying placebo in DOSE column} + +\item{xlab}{text to be used as x axis label} + +\item{ylab}{text to be used as y axis label} + +\item{prob_text_size}{probability text size default to 5} + +\item{prob_obs_bydose}{TRUE/FALSE} + +\item{N_text_size}{N responders/Ntotal by exposure bin text size default to 5} + +\item{binlimits_text_size}{5 binlimits text size} + +\item{dist_position_scaler}{space occupied by the distribution default to 0.2} + +\item{dist_offset}{offset where the distribution position starts 0} + +\item{lineranges_ypos}{where to put the lineranges 0.2} + +\item{yproj}{project the probabilities on y axis TRUE/FALSE} + +\item{yproj_xpos}{y projection x position 0} + +\item{yproj_dodge}{y projection dodge value 0.2} + +\item{yaxis_position}{where to put y axis "left" or "right"} + +\item{facet_formula}{facet formula to be use otherwise endpoint ~ expname} + +\item{theme_certara}{apply certara colors and format for strips and default colour/fill} +} +\description{ +Produces a logistic fit plot with a facettable exposures/quantiles/distributions in ggplot2 +} +\examples{ +# Example 1 +library(ggplot2) +effICGI <- logistic_data |> +dplyr::filter(!is.na(ICGI))|> +dplyr::filter(!is.na(AUC)) +effICGI$DOSE <- factor(effICGI$DOSE, + levels=c("0", "600", "1200","1800","2400"), + labels=c("Placebo", "600 mg", "1200 mg","1800 mg","2400 mg")) +effICGI$STUDY <- factor(effICGI$STUDY) +effICGI$ICGI2 <- effICGI$ICGI +effICGI <- tidyr::gather(effICGI,Endpoint,response,ICGI,ICGI2) +gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice + response = "response", + endpoint = "Endpoint", + exposure_metrics = c("AUC","CMAX"), + exposure_metric_split = c("quartile"), + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + exposure_distribution ="distributions") + +# Example 2 +gglogisticexpdist(data = effICGI, # long format filter to Endpoint of choice +response = "response", +endpoint = "Endpoint", +DOSE = "DOSE", +exposure_metrics = c("AUC"), +exposure_metric_split = c("quartile"), +exposure_distribution ="lineranges", +exposure_metric_soc_value = -99, +exposure_metric_plac_value = 0, +lineranges_ypos = -0.2, +prob_obs_bydose = TRUE, +yproj_xpos = -10, +yproj_dodge = 10, +dist_position_scaler = 0.15) +# Example 3 +library(ggh4x) +gglogisticexpdist(data = effICGI |> + dplyr::filter(Endpoint=="ICGI"), + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC"), + exposure_metric_split = c("quartile"), + exposure_distribution ="distributions", + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + dist_position_scaler = 0.15)+ + facet_grid2(Endpoint~expname+DOSE2,scales="free", + margins = "DOSE2",strip = strip_nested()) +# Example 4 +gglogisticexpdist(data = effICGI, + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC"), + exposure_metric_split = c("quartile"), + exposure_distribution ="distributions", + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + lineranges_ypos = -0.2, + yproj_xpos = -10, + yproj_dodge = 20, + prob_text_size = 9, + binlimits_text_size = 6, + N_text_size = 5, + dist_position_scaler = 0.15)+ + ggplot2::scale_x_continuous(breaks = seq(0,350,50), + expand = ggplot2::expansion(add= c(0,0),mult=c(0,0)))+ + ggplot2::coord_cartesian(xlim = c(-30,355))+ + ggplot2::facet_grid(~Endpoint) +\dontrun{ +#Example 5 +gglogisticexpdist(data = effICGI |> filter(Endpoint=="ICGI"), + response = "response", + endpoint = "Endpoint", + DOSE = "DOSE", + exposure_metrics = c("AUC"), + exposure_metric_split = c("quartile"), + exposure_distribution ="distributions", + exposure_metric_soc_value = -99, + exposure_metric_plac_value = 0, + dist_position_scaler = 0.15)+ + facet_grid(Endpoint~expname+exptile,scales="free", + margins = "exptile") +} +} diff --git a/man/logistic_data.Rd b/man/logistic_data.Rd new file mode 100644 index 0000000..93d2e47 --- /dev/null +++ b/man/logistic_data.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logisticdata.R +\docType{data} +\name{logistic_data} +\alias{logistic_data} +\title{Simulated Exposure Response Data} +\format{ +A data frame with 600 rows and 10 variables +\describe{ + \item{STUDY}{Study identifier} + \item{ID}{Subject Identifier} + \item{DOSE}{Dose, in mg} + \item{GBDS}{Dose, in alternative salt} + \item{SEX}{Sex of the subject} + \item{AGE}{age of the subject, in years} + \item{WT}{weight of the subject, in kg} + \item{RACE}{Race of the subject} + \item{CRCL}{Creatinine clearance} + \item{BRLS}{RLS score} + \item{PRLS}{RLS score} + \item{AUC}{Area under the curve exposure} + \item{CMAX}{Maximun concentration exposure} + \item{ICGI}{response 0/1} + \item{ICGI7}{response 1 to 7} +} +} +\source{ +inspired from a real data submission +} +\usage{ +logistic_data +} +\description{ +A dataset containing data suitable for logistic regression +} +\keyword{datasets}