diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/1_Data/FPNMA_means.rds b/1_Data/FPNMA_means.rds new file mode 100644 index 0000000..fb6ddf2 Binary files /dev/null and b/1_Data/FPNMA_means.rds differ diff --git a/1_Data/ID6184_RCC_model inputs FAD version [ACIC redacted, cPAS redacted and CIC redacted].xlsm b/1_Data/ID6184_RCC_model inputs FAD version [ACIC redacted, cPAS redacted and CIC redacted].xlsm new file mode 100644 index 0000000..7a8c5d8 Binary files /dev/null and b/1_Data/ID6184_RCC_model inputs FAD version [ACIC redacted, cPAS redacted and CIC redacted].xlsm differ diff --git a/1_Data/IPD_R_input_noACIC.xlsx b/1_Data/IPD_R_input_noACIC.xlsx new file mode 100644 index 0000000..219a9ac Binary files /dev/null and b/1_Data/IPD_R_input_noACIC.xlsx differ diff --git a/1_Data/PH_NMA_CODA.rds b/1_Data/PH_NMA_CODA.rds new file mode 100644 index 0000000..f0e8ddf Binary files /dev/null and b/1_Data/PH_NMA_CODA.rds differ diff --git a/1_Data/README.Rmd b/1_Data/README.Rmd new file mode 100644 index 0000000..316903c --- /dev/null +++ b/1_Data/README.Rmd @@ -0,0 +1,35 @@ +--- +title: "Data folder" +references: library.bib +bibliography: library.bib +output: + word_document: + toc: yes + toc_depth: '3' + html_document: + df_print: paged + toc: yes + toc_depth: 3 + number_sections: yes +csl: elsvan.csl +--- + +# Description + +This is the readme for the data folder. Explanation and sources for all data used will be provided here. This is an example of how a README will be structured for the generic model in Phase II of this pilot. + +The file library.bib is a LaTex file containing the reference library. This can be updated directly from the reference manager Zotero (it can export .bib files). Use the visual editor in Rstudio (v1.2+) to easily add references using the drop down menu. There is an example in there, taken from Google scholar results (click ref and then bibtex). + +# Some example text + +The NICE DSU published a further technical support document, expanding the set of survival analysis approaches recommended by NICE [@tsd21]. + +## gpop adjustment + +The population norms for HRQoL are adjusted for declination in the general population, using the 2011 article by Ara & Brazier [@ara2011using]. This publication does not report the variance-covariance matrix for the regression analysis, so it is impossible to vary these adjustments in the probabilistic setting as the correlation structure between parameters cannot be taken into account. Varying these separately could lead to spurious results or inference due to non-linear interactions between the parameters (some of which are transformed). + +### Something even more specific + +Something interesting [@brazier2011; @craig2012; @goryakin2015]. + +# References diff --git a/1_Data/README.docx b/1_Data/README.docx new file mode 100644 index 0000000..5859c1a Binary files /dev/null and b/1_Data/README.docx differ diff --git a/1_Data/Survival_analysis_noTTDorTTPorPPS[NoACIC].rds b/1_Data/Survival_analysis_noTTDorTTPorPPS[NoACIC].rds new file mode 100644 index 0000000..1a45010 Binary files /dev/null and b/1_Data/Survival_analysis_noTTDorTTPorPPS[NoACIC].rds differ diff --git a/1_Data/elsnumalph.csl b/1_Data/elsnumalph.csl new file mode 100644 index 0000000..8d35a0a --- /dev/null +++ b/1_Data/elsnumalph.csl @@ -0,0 +1,134 @@ + + diff --git a/1_Data/elsvan.csl b/1_Data/elsvan.csl new file mode 100644 index 0000000..be85b6d --- /dev/null +++ b/1_Data/elsvan.csl @@ -0,0 +1,163 @@ + + diff --git a/1_Data/library.bib b/1_Data/library.bib new file mode 100644 index 0000000..ad6984c --- /dev/null +++ b/1_Data/library.bib @@ -0,0 +1,46 @@ +@article{ara2011using, + title={Using health state utility values from the general population to approximate baselines in decision analytic models when condition-specific data are not available}, + author={Ara, Roberta and Brazier, John E}, + journal={Value in Health}, + volume={14}, + number={4}, + pages={539--545}, + year={2011}, + publisher={Elsevier} +} + +@article{tsd21, + title = {NICE DSU Technical Support Document 21: Flexible Methods for Survival Analysis}, + author = {Rutherford, Mark J and Lambert, Paul C and Sweeting, Michael J and Pennington, Becky and Crowther, Michael J and Abrams, Keith R and Latimer, Nicholas R}, + pages = {97}, + note = {Citation Key: tsd21}, + langid = {en} +} + +@article{brazier2011, + title = {NICE DSU Technical Support Document 11: Alternatives to EQ-5D for Generating Health State Utility Values}, + author = {Brazier, John and Rowen, Donna}, + year = {2011}, + date = {2011}, + url = {http://www.nicedsu.org.uk/TSD11%20Alternatives%20to%20EQ-5D_final.pdf} +} + +@article{craig2012, + title = {Using natural experiments to evaluate population health interventions: new Medical Research Council guidance}, + author = {Craig, Peter and Cooper, Cyrus and Gunnell, David and Haw, Sally and Lawson, Kenny and Macintyre, Sally and Ogilvie, David and Petticrew, Mark and Reeves, Barney and Sutton, Matt and others, }, + year = {2012}, + date = {2012}, + journal = {Journal of epidemiology and community health}, + pages = {jech{\textendash}2011} +} + +@article{goryakin2015, + title = {The impact of economic, political and social globalization on overweight and obesity in the 56 low and middle income countries}, + author = {Goryakin, Yevgeniy and Lobstein, Tim and James, W Philip T and Suhrcke, Marc}, + year = {2015}, + date = {2015}, + journal = {Social Science & Medicine}, + pages = {67{\textendash}76}, + volume = {133}, + note = {Publisher: Elsevier} +} diff --git a/2_Scripts/Model_Structure.R b/2_Scripts/Model_Structure.R new file mode 100644 index 0000000..c2bb477 --- /dev/null +++ b/2_Scripts/Model_Structure.R @@ -0,0 +1,2090 @@ +#### 1. Installation ########### +#### This code has been created using R version 4.3.1 +#### All packages used by this model are provided here + +#### Comment out the below section which installs the relevant packages after the first run of the model +# install.packages("shiny", quiet = TRUE) ### the quiet argument is used to avoid warnings appearing in the console (useful for later conversion to web app) +# install.packages("gtools", quiet = TRUE) +# install.packages("openxlsx", quiet = TRUE) +# install.packages("flexsurv", quiet = TRUE) +# install.packages("tidyverse", quiet = TRUE) +# install.packages("data.table", quiet = TRUE) +# install.packages("heemod", quiet = TRUE) +# install.packages("logOfGamma", quiet = TRUE) +# install.packages("ggplot2", quiet = TRUE) +# install.packages("survminer", quiet = TRUE) +# install.packages("officer", quiet = TRUE) +# install.packages("officedown", quiet = TRUE) +# install.packages("magrittr", quiet = TRUE) +# install.packages("Hmisc", quiet = TRUE) +# install.packages("future.apply", quiet = TRUE) +# install.packages("crosstable", quiet = TRUE) +# install.packages("flextable", quiet = TRUE) +# install.packages("stringr", quiet = TRUE) +# install.packages("BCEA", quiet = TRUE) +# install.packages("collapse", quiet = TRUE) +# install.packages("scales", quiet = TRUE) +# install.packages("Matrix", quiet = TRUE) +# install.packages("dplyr", quiet = TRUE) +# install.packages("progressr", quiet = TRUE) +# install.packages("microbenchmark", quiet = TRUE) + +### Loading libraries + +#### This section needs to be run every time and calls each package from the library +library(shiny, quiet = TRUE) +library(gtools, quiet = TRUE) +library(openxlsx, quiet = TRUE) +library(flexsurv, quiet = TRUE) +library(tidyverse, quiet = TRUE) +library(data.table, quiet = TRUE) +library(heemod, quiet = TRUE) +library(logOfGamma, quiet = TRUE) +library(ggplot2, quiet = TRUE) +library(survminer, quiet = TRUE) +library(officer, quiet = TRUE) +library(officedown, quiet = TRUE) +library(magrittr, quiet = TRUE) +library(Hmisc, quiet = TRUE) +library(future.apply, quiet = TRUE) +library(crosstable, quiet = TRUE) +library(flextable, quiet = TRUE) +library(stringr, quiet = TRUE) +library(BCEA, quiet = TRUE) +library(collapse, quiet = TRUE) +library(scales, quiet = TRUE) +library(Matrix, quiet = TRUE) +library(dplyr, quiet = TRUE) +library(progressr, quiet = TRUE) +library(microbenchmark, quiet = TRUE) + + +# Multi-core processing: +# +# Instructions. +# +# This model is highly RAM intensive. You need a lot of RAM on your computer +# to run this model due to the large amount of very large matrix multiplications +# (up to approximately 15,000 discrete health states in the model). Therefore, +# in order to efficiently run the model, it is a balancing act between RAM +# usage and CPU usage. +# +# Some rough guidance is: +# +# - If you have 8GB of RAM on your computer, you can run this model with 2 cores only +# but it may even be faster to run in series if you have other things open on your +# computer at the same time. Therefore, please set keep_free_cores to NA and run +# the model in series. This is because when the RAM on your computer runs out +# your computer will use the hard-disk instead which is extremely slow. +# - If you have 16GB of RAM on your computer, parallel should be a lot faster. +# On my laptop (I7 8th gen, 16GB RAM, running Linux for low RAM usage) I can +# run with 5 cores whilst using about 12GB of RAM running this model. +# - if you have 24GB or 32GB of RAM, you should be able to run the model with 8 +# and up to around 14 cores before running out of RAM whilst running the model. +# - if you are using a HPC, you should be able to run this model with many cores +# due to the typically large amount of RAM available per core in a HPC +# +# +# IF YOU DO NOT WANT MULTICORE SET keep_free_cores TO NA +# +# +keep_free_cores <- 4 +if (any(is.na(keep_free_cores), keep_free_cores<0)) { + plan(sequential) +} else { + plan(multisession(workers = max(availableCores()-keep_free_cores,1))) +} + +# Other generic settings for the progress bar and units for table widths +handlers("progress") +options(crosstable_units="cm") + + +#### 2. Loading functions ########### + + +# This variable is used throughout the model to define whether to provide additional outputs useful for QC or not +# The model will take longer to run when this is set to TRUE +qc_mode <- FALSE + + + +# 2.1. Excel data extraction functions ----------------------------------------- + +#### These functions are used to extract parameters from the Excel input workbook for use in R +#### During Phase 2 a Shiny front-end will be added to the model which will allow an alternative mechanism to upload these types of inputs + +source(file.path("./3_Functions/excel/extract.R")) + +# 2.2. Treatment sequencing functions ---------------------------------------- + +#### Function: filter to active treatments and lines +##### Takes as an input the defined sequences, evaluation type and line to start the evaluation from +##### Other input is % receiving each subs therapy at each line dependent on previous treatments received +##### Reweights so that the % receiving each treatment sums to 100% within each arm / line being studied +##### Outputs a matrix that has the % receiving each possible combination + +source("./3_Functions/sequencing/sequences.R") + +# 2.3. Survival analysis functions --------------------------------------------- + +# Function: conduct survival analysis +##### by treatment, line, population and outcome fitted survival curves using Flexsurvreg (exp, Weibull, lognormal, loglog, Gompertz, gen gamma) +##### calculation of and adjustment for general population +##### adjustment for treatment effect waning + +source("./3_Functions/survival/Survival_functions.R") +source("./3_Functions/survival/other_cause_mortality.R") +source("./3_Functions/survival/treatment_effect_waning.R") + +# 2.4 Misc functions ---------------------------------------------------------- + +### these functions enable smoother data cleaning and manipulation + +source("./3_Functions/misc/other.R") +source("./3_Functions/misc/shift_and_pad.R") +source("./3_Functions/misc/cleaning.R") + +# 2.4.1 Functions imposing list structures ----------------------------------- + +source("./3_Functions/misc/nesting.R") +source("./3_Functions/misc/discounting.R") +source("./3_Functions/misc/qdirichlet.R") +source("./3_Functions/misc/plotting.R") +source("./3_Functions/misc/structure.R") + +# 2.4.2 Functions calculating HRs from FPNMA coefficients and other FPNMA manipulation ------ + +source("./3_Functions/misc/fpnma_fns.R") + + +# 2.5 Utility functions ------------------------------------------------------- + +source("./3_Functions/utility/age_related.R") +source("./3_Functions/costs_and_QALYs/utility_processing.R") + +# 2.6 AE functions -------------------------------------------------------- + +source("./3_Functions/adverse_events/AE_steps.R") + +# 2.7 Cost calculation functions -------------------------------------------- + +source("./3_Functions/costs_and_QALYs/cost_processing.R") + + +# 2.8 State transition modelling functions -------------------------------- + +source("./3_Functions/markov/markov.R") + +# 2.9 Patient flow functions ---------------------------------------------- + +source("./3_Functions/patient_flow/overarching.R") +source("./3_Functions/patient_flow/partitioned_survival.R") +source("./3_Functions/patient_flow/markov.R") +source("./3_Functions/patient_flow/drug_costs.R") +source("./3_Functions/patient_flow/hcru_costs.R") +source("./3_Functions/patient_flow/qalys.R") +source("./3_Functions/patient_flow/ae.R") + + + +# 2.10 Results processing functions --------------------------------------- + +source("./3_Functions/results/incremental_analysis.R") +source("./3_Functions/results/model_averaging.R") +source("./3_Functions/results/partitioned_survival.R") +source("./3_Functions/misc/severity_modifier.R") +source("./3_Functions/results/results_tables.R") +source("./3_Functions/psa/psa functions.R") + + + +# 2.11 Office software outputs -------------------------------------------- + +source("./3_Functions/reporting/word_document_output.R") + + + +# 3. Model inputs structure -------------------------------------------------- + +# Model inputs should be in a list called i. This list then contains all of the +# inputs for the model, NOT the parameters used to calculate the model. In effect, +# this is a place to store all model information BEFORE it gets boiled down to +# what's needed to run 1 model. +# +# using i allows subsetting by categorisation, which makes things a lot easier +# to find and avoids all long variable names +# +# the structure of i should be by category. There are the following +# categories: +# +# dd - dropdown inputs taken from Excel +# i - parameter inputs taken from Excel +# r_ tables taken from Excel +# List, id and lookup - lists defined and used within the code +# basic - basic inputs (time horizon, cycle length, discount rates, so on so forth) +# surv - survival analysis inputs including raw data +# sequences and seq - inputs and outputs related to the possible sequences of treatments +# cost - drug and hcru costs. All costs are here to keep things together (dosing is not cost) +# util and QALYs - utility and QALY inputs +# misc - misc inputs e.g. graph labelling +# + +#### 3.1 Loading input parameters ########### + +# This model allows two possible structures to be analysed: state transition with a user definable number of lines +# with health states based on time to discontinuation (drug costs) and progression status (quality of life and movement +# between lines) and PartSA with 3 health states (pre-progression, post-progression and death) + +# During Phase 1 of this pilot we use the model to evaluate the decision problem for a single therapy +# (cabo+nivo, defined as molecule 1) starting at 1st line +# During Phase 2 we will adapt this code to evaluate the cost-effectiveness of sequences starting at a user-defined line + +# Inputs to this model need to be downloaded from NICEdocs + +User_types <- c("Submitting company", "NICE", "EAG", "Committee", "NHSE", "Clinical expert", "Patient expert", "Non-intervention stakeholder", "Public") + +# The submitting company are able to see their own CIC and AIC data (marked up blue / yellow in reporting but not anything else: green marking +# green marked data has been either be replaced with 0 [PAS discounts, RWE IPD] or dummy data) +# NICE users will be able to see everything +# Other users will not be able to see any marked data, this is replaced with dummy data + +# The way raw data is fed into the model currently works as follows +# Define the path to where the data file lives using the select file functionality + +# The model then processes the file the user selected + +# There are a number of files which contain raw or intermediate inputs: +# 1. The Excel user interface - this contains information from company data and the UK RWE +# 2. The proportional hazards NMA CODA RDS file - this contains information from company data +# 3. The fractional polynomials NMA RDS file - this contains information from company data +# 4. Either the raw data file containing the pseudo-IPD for all trials for survival analysis (RWE and company data included); or +# 5. The RDS output from the survival analysis using both RWE and company data + +# You will need to manually select the inputs file relevant to your user type, this is not stored on Github as access to CIC information differs by user type + +# The first part of this code pulls all of the named ranges from the excel workbook, expand the parameters table + +#Option to define Excel path on local machine - comment in this and comment out the code below to select file +excel_path <- "./1_Data/ID6184_RCC_model inputs FAD version [ACIC redacted, cPAS redacted and CIC redacted].xlsm" +#i <- f_excel_extract(excel_path, verbose = TRUE) + +if (file.exists(excel_path)) { + i <- f_excel_extract(excel_path, verbose = TRUE) +} else { + i <- f_excel_extract(rstudioapi::selectFile( + caption = "Select the Excel inputs file (ID6184_RCC_model inputs....xlsm)", + label = "ID6184_RCC_model inputs....xlsm", + path = "./1_Data/", + filter = "Excel Files (*.xlsm)", + existing = TRUE + ), verbose = TRUE) +} + +i <- c(i,f_excel_cleanParams(i$R_table_param)) + + +# Set which decision problem to look at, initially functionality has been geared towards the decision problem for cabozantinib plus nivolumab +i$decision_problem <- "cabo+nivo" + +# We then create a place for identifiers. Adding in an object to i full of lookup tables makes automated translation +# possible even when one doesn't know the number of items ex ante, or how they combine. +# +# If the lookup table is correct one can translate id numbers to text strings which are +# consistent throughout the entire model. This is extremely useful as the model can +# be expanded to any number of treatments and potentially even any number of lines +# (up to a reasonable maximum) + +i$id <- list(ipd = list()) +i$lookup <- list(ipd = list()) + +# Add distribution names to i +# This model only includes standard parametric distributions as more complex distributions were not deemed to be required for the included treatments + +i$distnames <- + c( + gengamma = "gengamma", + exp = "exp", + weibull = "weibull", + lnorm = "lnorm", + gamma = "gamma", + gompertz = "gompertz", + llogis = "llogis" + ) + + + +# The next step is to then "tidy up" i into another object, p. p doesn't necessarily +# have to house everything, only things that will change in PSA + +p <- f_misc_param_generate_p(i) + +# Set seed for PSA - note this is done in the script to run the PSA, not here! +# set.seed(1475) + +# Max lines within the R model +p$basic$R_maxlines <- 4 + +# Pass this into p so that p can be used to exclusively compute the model: +p$basic$decision_problem <- i$decision_problem + +#### 3.2 Define sequences ########### + +#### This code produces a list of possible sequences per population based upon the rules defined for RCC +#### and the user input number of lines + + +# Add drug names to comparators vector extracted from inputs list. + +i$sequences <- f_generate_sequences( + comparators = i$List_comparators, + maxlines = p$basic$R_maxlines +) + +# restrict the pathways to those that are possible and permitted. +i$sequences <- as.data.frame(i$sequences) + +populations <- i$i_nr_populations + +seqs <- NULL +for (population in 1:populations) { + cat("Applying sequence restrictions to population", population,"\n") + + s <- f_path_tx_restrict( + sequences = i$sequences, + allowed = f_get_allowed_lists(i, population), #overall list of allowed drugs in this popn + L1 = f_get_L1_lists(i, population), # 1L drugs allowed in this popn + L2 = f_get_L2_lists(i, population), # 2L drugs allowed in this popn + L3 = f_get_L3_lists(i, population), # 3L drugs allowed in this popn + L4 = f_get_L4_lists(i, population), # 4L drugs allowed in this popn + only_after = f_get_only_after_lists(i, population), #list of restrictions where tx can be only after the listed txs + not_immediate_after = f_get_not_immediate_after_lists(i, population), #list of restrictions where tx can be only immediately before the listed txs + one_in_list = f_get_one_in_list_lists(i, population), #list of restrictions where only one of the tx in each list is allowed + only_after_one = f_get_only_after_one_lists(i, population), #list of restrictions where only one of the listed treatments is allowed prior to current therapy + L2_only_after = f_get_2L_only_after_lists(i, population), #list of 2L+ restrictions: if drug is used 2L, 3L or 4L, can only be after drug x + L2_only_immediate_after = f_get_2L_only_immediate_after_lists(i, population), #list of 2L+ restrictions: if drug is used 2L, 3L or 4L, can only be immediately after drug x + L2_only_one = f_get_2L_only_one_lists(i, population) #list of 2L+ drugs where only one of them allowed in a given sequence + ) + s <- cbind(rep(paste0("pop", population),nrow(s)), s) + colnames(s) <- paste0('V', seq_len(ncol(s))) # rbind no longer likes un-named columns so added this + seqs <- rbind(seqs, s) +} +rownames(seqs) <- NULL + +i$sequences <- seqs + +#### Uncomment this code to view the sequences and write the sequences defined to csv + +# i$sequences +# write.csv(seqs, "4_Output/sequences.csv", row.names = F) +rm(s, seqs, populations) + +# define number of cycles and a vector of the cycles + + +# 3.3. Survival analysis ------------------------------------------------------- + +# All objects here go in i$surv initially, and are then streamlined down to +# what's needed to run models in the transition from i to p. +# +# Some values of p are used during the below (primarily p$surv$distNames, which +# controls which distributions are included in the flexsurv runs) + + +# 3.3.1 Survival input structure ------------------------------------------ + +i$surv <- list() + +#### Read in survival data from Excel workbook + +# Pull out the raw data from the IPD excel book - one named range per treatment at each line +# Each reference curve is defined in Excel as time (weeks), event/censor (event coded as 1, censor as 0), patient group, line, molecule, trial and endpoint +# Pull all of the named ranges from the excel workbook, expand the parameters table + +excel_path2 <- "./1_Data/IPD_R_input_noACIC.xlsx" +if (file.exists(excel_path2)) { + wb <- f_excel_extract(excel_path2, verbose = TRUE) + i$surv$pld <- as.data.table(wb$`_xlnm._FilterDatabase`) + rm(wb) +} else { + wb <- f_excel_extract(rstudioapi::selectFile( + caption = "Select the IPD file (IPD_R_input_noACIC.xlsx)", + label = "IPD_R_input_noACIC.xlsx", + path = "./1_Data/", + filter = "Excel Files (*.xlsx)", + existing = TRUE + ), verbose = TRUE) + i$surv$pld <- as.data.table(wb$`_xlnm._FilterDatabase`) + +} + + +# Some small cleaning of the PLD. +i$surv$pld <- i$surv$pld[,list(population,line,molecule,trial,endpoint,timew,event_censor)] + +# Do not allow zero survival times, they have to be at least 1 day. the TUotA is +# weeks, so 1 day is 1/7 weeks: +i$surv$pld[timew ==0,"timew"] <- 1/7 + +# The named range r_pld has numeric identifiers for: +# +# - pop +# - line +# - mol (i.e., regimen - combination therapies are under the same number) +# - trial (trial id WITHIN population line and molecule to set them apart from each other - usually just 1!) +# - endpoint + +# These numeric identifiers are then used to create a nested list of survival regression models and +# extrapolations. The extrapolations are filtered down to the extrapolations that are selected +# within the excel input sheet, but the rest are kept here in i in case of scenario analysis. +# +# Note that the lookup tables in the next section are used to translate these numbers +# into human-readable identifiers. + +# 3.3.2 Data identification ------------------------------------------ + +# There is a lot of nesting involved in this part of the analysis, with population line, regimen trial and endpoint +# making a total of 5 layers of nesting to automatically go through each endpoint for each trial for +# each regimen for each line for each population, perform all regression analyses, produce parameters +# and have an easily identifiable (and therefore programmable) spaces for the results of each analysis +# which can then be spat out into reporting. + +# The first step is to break up r_pld into separate datasets depending on the identifiers. A function +# is used to do this which returns nothing if such data for one id set doesn't exist. +# +# Note that at this stage it is just those contexts which HAVE got PLD which are to be organised. +# For those endpoints and so on that do not have data, a separate step after this one to populate +# every endpoint for every treatment line for every treatment sequence is performed. + +i$id$ipd <- list( + pop = i$r_pld_lookup_pop$Number[!is.na(i$r_pld_lookup_pop$Number)], + line = i$r_pld_lookup_line$Number[!is.na(i$r_pld_lookup_line$Number)], + mol = i$r_pld_lookup_mol$Number[!is.na(i$r_pld_lookup_mol$Number)], + trial = i$r_pld_lookup_trial$Number[!is.na(i$r_pld_lookup_trial$Number)], + endpoint = i$r_pld_lookup_endpoint$Number[!is.na(i$r_pld_lookup_endpoint$Number)] +) + +names(i$id$ipd$pop) <- paste0("pop_" , i$id$ipd$pop) +names(i$id$ipd$line) <- paste0("line_" , i$id$ipd$line) +names(i$id$ipd$mol) <- paste0("mol_" , i$id$ipd$mol) +names(i$id$ipd$trial) <- paste0("trial_" , i$id$ipd$trial) +names(i$id$ipd$endpoint) <- paste0("endpoint_", i$id$ipd$endpoint) + + +# to see this, we have: +#i$id$ipd + +# Generating the same structure but with the translation table from number to +# text: + +i$lookup$ipd <- list( + pop = data.table(i$r_pld_lookup_pop)[Description != 0], + line = data.table(i$r_pld_lookup_line)[Description != 0], + mol = data.table(i$r_pld_lookup_mol)[Description != 0], + trial = data.table(i$r_pld_lookup_trial)[Description != 0], + endpoint = data.table(i$r_pld_lookup_endpoint)[Description != 0] +) + +# For treatment line, add a translator for the column in the sequences output: + +i$lookup$ipd$line$seq_col <- paste0("V",2:(nrow(i$lookup$ipd$line)+1)) +i$lookup$ipd$line$R_id <- paste0("line_",1:nrow(i$lookup$ipd$line)) + +i$lookup$dist <- i$r_pld_lookup_dist + + +# This means that you can easily look up things like so: + +# i$lookup$ipd$mol[Number == 1,list(Description,RCC_input_desc)] +# i$lookup$ipd$mol[Number == 2,list(Description,RCC_input_desc)] +# i$lookup$ipd$line[Number == 1,list(Description,RCC_input_desc)] +# i$lookup$ipd$pop[Number == 0,list(Description,RCC_input_desc)] + +# One can also do the opposite, translating input file descriptions into numbers: + +# i$lookup$ipd$mol[RCC_input_desc == "ipi_nivo",list(Description,Number)] + +i$lookup$trt <- i$lookup$ipd$mol$Number +names(i$lookup$trt) <- i$lookup$ipd$mol$RCC_input_desc +names(i$lookup$trt)[length(i$lookup$trt)] <- "BSC" + +# pass to p whenever i$lookup has been populated/updated. +p$basic$lookup <- i$lookup +p$basic$id <- i$id + +# one can then simply i$lookup$trt["nivolumab"] or i$lookup$trt["sorafenib"] to +# get the id numbers. + +# This then means that one can translate the treatment sequence data generated earlier +# into numerical versions in one go: + +# Start by making the id for population fit with the rest of the model (pop_ with pop +# starting from 0). NOTE that there is 1 more population in treatment sequences than +# in the rest of the model... + +i$seq_clean <- data.table(i$sequences) + +i$seq_clean$V1 <- paste0("pop_",as.numeric(substr(i$seq_clean$V1,4,4)) - 1) + +i$seq_pops <- unique(i$seq_clean$V1) +names(i$seq_pops) <- i$seq_pops + +# The "clean" version of sequences - first with words, then with numbers, then references + +i$seq_clean <- lapply(i$seq_pops, function(popu) { + tmp <- i$seq_clean[V1 == popu,-1] + colnames(tmp) <- i$lookup$ipd$line$R_id[1:(p$basic$R_maxlines + 1)] + tmp +}) + +# It's pretty nested this but simplifies upon explanation: lapply on a data.frame +# or data.table goes column-wise, so going across columns substitute the values +# for the values in i$lookup$trt which have corresponding names, returning the numbers +# which are consistent throughout the model. The way of looking inside e.g. network +# is e.g. pop_2$line_5$mol_2$endpoint_1, so now we can use the tables produced below +# to "order" the inputs for a treatment pathway +i$seq_n <- lapply(i$seq_clean, function(popu) { + as.data.table(lapply(popu, function(co) i$lookup$trt[co])) +}) +i$seq_ref <- lapply(i$seq_clean, function(popu) { + tmp <- as.data.table(lapply(popu, function(co) { + vals <- paste0("mol_",i$lookup$trt[co]) + ifelse(vals == "mol_NA",NA,vals) + })) +}) + + +# Now that we have the final sequence list, we can add them to p: + +p$seq$n <- i$seq_n +p$seq$ref <- i$seq_ref +p$seq$qc <- i$seq_clean + +# NOTE: QC check here is for NAs that are not beyond a 999 (i.e. past BSC) + +# We now have all the treatment sequences in the form of the molecule +# number and the consistent reference linking right back to the named range +# r_pld_lookup_mol in the excel front end. This ensures that the R model is +# consistent with the R model in terms of which drugs are feeding through +# to different places, as manually checking that is a very difficult and time +# consuming task. +# +# Long story short: +# +# - i$seq_clean: names of treatments per excel front end in order for all populations. use i$lookup$ipd$mol as reference table. +# - i$seq_n: corresponding treatment numbers per named range r_pld_lookup_mol in excel +# - i$seq_ref: reference name for pulling things out of R lists (e.g. p$drug[unlist(i$seq_ref$pop_0[1,])]) pulls pop 0 first sequence drug info IN ORDER :) +# +# This is automatically in line with the reference tables in the excel front end +# loaded at the time. If the ordering is changed there it needs updating in the IPD +# and in the lookup tables in the lists sheet of excel (and throughout excel!) +# +# +# +# i.e, if Excel lookup tables are wrong, this will be wrong!!! +# +# + + +# 3.3.3 TSD14 survival analysis ------------------------------------------ + +# Now that treatment sequences are brought in and cleaned up ready for use, we +# can perform the survival analysis. +# +# Use the function in Survival_functions.R to perform "simple" extrapolations +# on all pop line mol trial endpoint combinations with available data and return +# NULL for the rest + +# Let's perform some labelling like we did for treatment sequences for convenience/QC + +i$surv$lab_pld <- list() + +i$surv$lab_pld$population <- i$lookup$ipd$pop$Number +names(i$surv$lab_pld$population) <- i$lookup$ipd$pop$Description + +i$surv$lab_pld$line <- i$lookup$ipd$line$Number +names(i$surv$lab_pld$line) <- i$lookup$ipd$line$Description + +i$surv$lab_pld$molecule <- i$lookup$ipd$mol$Number +names(i$surv$lab_pld$molecule) <- i$lookup$ipd$mol$Description + +i$surv$lab_pld$trial <- i$lookup$ipd$trial$Number +names(i$surv$lab_pld$trial) <- i$lookup$ipd$trial$Description + +i$surv$lab_pld$endpoint <- i$lookup$ipd$endpoint$Number +names(i$surv$lab_pld$endpoint) <- i$lookup$ipd$endpoint$Description + + +# Now, put the data in a space and replace numbers with labels: + +i$surv$lab_pld$dat <- i$surv$pld +i$surv$lab_pld$dat$population <- names(i$surv$lab_pld$population)[match(i$surv$lab_pld$dat$population,i$surv$lab_pld$population)] +i$surv$lab_pld$dat$line <- names(i$surv$lab_pld$line)[match(i$surv$lab_pld$dat$line,i$surv$lab_pld$line)] +i$surv$lab_pld$dat$molecule <- names(i$surv$lab_pld$molecule)[match(i$surv$lab_pld$dat$molecule,i$surv$lab_pld$molecule)] +i$surv$lab_pld$dat$trial <- names(i$surv$lab_pld$trial)[match(i$surv$lab_pld$dat$trial,i$surv$lab_pld$trial)] +i$surv$lab_pld$dat$endpoint <- names(i$surv$lab_pld$endpoint)[match(i$surv$lab_pld$dat$endpoint,i$surv$lab_pld$endpoint)] + +# Now we have a labelled version which is a bit easier to QC. + +# Note to debug it is very helpful to set verbose to TRUE below so that you can identify +# the datasets which are problematic (e.g. not converging, 0 time values) + +i$surv$n_by_plmte <- i$surv$pld[, .N, by = list(population, line,molecule,trial,endpoint)] %>% + arrange(population,line, molecule,trial,endpoint) + +i$surv$n_by_plmte$population <- i$lookup$ipd$pop[match(i$surv$n_by_plmte$population ,i$lookup$ipd$pop$Number),Description] +i$surv$n_by_plmte$line <- i$lookup$ipd$line[match(i$surv$n_by_plmte$line ,i$lookup$ipd$line$Number),Description] +i$surv$n_by_plmte$molecule <- i$lookup$ipd$mol[match(i$surv$n_by_plmte$molecule ,i$lookup$ipd$mol$Number),Description] +i$surv$n_by_plmte$molecule[which(is.na(i$surv$n_by_plmte$molecule))] <- "Non-UK treatments (pooled)" +i$surv$n_by_plmte$trial <- i$lookup$ipd$trial[match(i$surv$n_by_plmte$trial ,i$lookup$ipd$trial$Number),Description] +i$surv$n_by_plmte$endpoint <- i$lookup$ipd$endpoint[match(i$surv$n_by_plmte$endpoint ,i$lookup$ipd$endpoint$Number),Description] + +# The number of rows in this table is the number of SETS of regression analyses +# that are going to be run (each is 7 regressions) + + +# The below code runs the survival analysis and saves as an RDS file for upload, this will only run if you set +# i$dd_run_surv_reg to "Yes" either in the Excel input or here + +if (i$dd_run_surv_reg == "Yes") { + + i$surv$reg <- f_surv_runAllTSD14( + r_pld = i$surv$pld, + id = i$id$ipd, + lookups = i$lookup$ipd, + draw_plots = FALSE, + distnames = i$distnames, + cl_y = p$basic$cl_y, + t_cyc = p$basic$t_cyc, + xlim_survplots_yr = p$misc$plot$xlim_survplots_yr, + t_yr = p$basic$t_yr, + verbose = qc_mode, + min_obs = 28 + ) + + # now, there is very little information available on BSC overall survival, + # for those people that decide they do not want further treatment + # + # The best data available currently is pooled PPS data on 4L patients, these + # are then 5L+ patients and given that there are currently 4 lines of therapy + # the proportion that receive something active subsequently is likely to be + # small. Consequently, this is likely a pooled analysis which can inform + # early (and 5L) BSC OVERALL SURVIVAL. + # + # Therefore the molecule 999 4th line PPS should be informed by a pooled analysis + # of all molecules' PPS at 4th line. That is, i$surv$pld[line == 4 & trial == 2 & endpoint == 4,] + # is the data that should inform endpoint 0 for all BSC. + + # MANUALLY RUN SURVIVAL FOR BSC PPS AS POOLED!!! + + i$surv$reg$pop_0$line_4$mol_999$trial_2$endpoint_4 <- lapply(1:1,function(x) { + + # Filter down to the parameters above associated with this combination: + ipd <- i$surv$pld[line==4 & endpoint==4,list(timew,event_censor)] + + names(ipd) <- c("t","e") + + cat(paste0( + "Survival analysis - population: ", i$lookup$ipd$pop[Number == 0, Description], + "\t line: " , i$lookup$ipd$line[Number == 4, Description], + "\t molecule: " , i$lookup$ipd$mol[Number == 999, Description], + "\t trial: " , i$lookup$ipd$trial[Number == 2, Description], + "\t endpoint: " , i$lookup$ipd$endpoint[Number == 4, Description], "\n" + )) + + fs_fits <- lapply(i$distnames, function(dist) { # applying all parametric survival curves in the list of distNames + fs_fit <- flexsurvreg( + formula = Surv(t, e) ~ 1, + data = ipd, + dist = dist + ) + return(list( + coefs = coefficients(fs_fit), # coefficients for the fitted model + vcov = vcov(fs_fit), # variance covariance matrix for the fitted model + fit = c(AIC= AIC(fs_fit), BIC=BIC(fs_fit), logLik = logLik(fs_fit)) # goodness of fit statistics for the fitted model + )) + }) + + gof <- do.call(rbind, lapply(i$distnames, function(dist) fs_fits[[dist]]$fit)) + + st <- matrix( + unlist(lapply(i$distnames, function(dist) { + f_extrapolate(p$basic$t_cyc, fs_fits[[dist]]$coefs, dist) + })), + ncol = length(i$distnames), + dimnames = list(NULL, i$distnames), + byrow = FALSE + ) + + + # curly braces on their own mean do this stuff and only return the last thing + # or what's in a return call + plot <- { + # First the IPD is produced in a format that survminer will accept. Data must all be + # the same format with the same column names. + # this assumes no covariate adjustment + + sm_ipd <- f_ce_km_MakeDatSurvFriendly( + Data_required = ipd, + time_column = "t", # note that this is taking IPD in weeks + event_column = "e", + t_multiplier = p$basic$cl_y # data in weeks, cycle length in plot years + ) + + # get the survival analysis in the form we need for survminer + # and make the extrapolations we need for survminer + + form <- Surv(t, ec) ~ 1 + sm_surv_est <- surv_fit(formula = form, data = sm_ipd) + + # make the plot with the input data: + survival_plot <- suppressMessages(f_extrap_plot( + SurvEstimate = sm_surv_est, + Data_required = sm_ipd, + curvefits_data = st, + time_vector = p$basic$t_yr, + xlim = p$misc$plot$xlim_survplots_yr, #### this will need replacing dependent on how many years we decide to show per time horizon + break_by = round(20/8,0) #### this will need replacing dependent on how many years we decide to show per time horizon + )) + list( + ipd = sm_ipd, + formula = form, + plot = survival_plot + ) + } + + + # Now that we've done everything for this dataset, return a list of the stuff + # we need for it: + return(list( + pop = i$lookup$ipd$pop[ Number == 0,Description], + line = i$lookup$ipd$line[ Number == 4,Description], + mol = i$lookup$ipd$mol[ Number == 999,Description], + tr = i$lookup$ipd$trial[ Number == 2,Description], + endpoint = i$lookup$ipd$endpoint[Number == 4,Description], + ipd = ipd, + fs_fits = fs_fits, + gof = gof, + st = st, + plot = plot + )) + })[[1]] + + saveRDS(i$surv$reg, file = "./1_Data/Survival_analysis.rds") + +} + +# to load in pre-run survival analysis select the RDS file here + +# option to load from pre-specified file path on local machine, uncomment this and comment out the line below to use + +RDS_path <- "./1_Data/Survival_analysis_noTTDorTTPorPPS[NoACIC].rds" +if (file.exists(RDS_path)) { + i$surv$reg <- readRDS(RDS_path) +} else { + i$surv$reg <- readRDS(rstudioapi::selectFile( + caption = "Please select 'Survival_analysis_noTTDorTTPorPPS[NoACIC].rds'", + label = "Survival_analysis_noTTDorTTPorPPS[NoACIC].rds", + path = "./1_Data/", + filter = "R Files (*.rds)", + existing = TRUE + )) +} + + +# Limit to model time horizon + +TH <- p$basic$th + 1 + +i$surv$reg <-lapply(i$surv$reg, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(endp) { + if (is.null(endp$st)) { + return(endp) + } else { + endp$st <- endp$st[1:TH,] + return(endp) + } + }) + }) + }) + }) +}) + + +# !!!!!! +# !!!!!! +# !!!!!! +# !!!!!! +# !!!!!! +# Note: i$surv$reg$pop_0$line_4$mol_999$trial_2$endpoint_4 is used +# to inform ALL BSC OS. This will be decided in the EXCEL FILE, which +# dropdowns for BSC OS should link to 4L PPS for mol 999 +# !!!!!! +# !!!!!! +# !!!!!! +# !!!!!! +# !!!!!! + + + +# Note that draw_plots will be a switch in the shiny application. +# In this case we draw plots because we need those plots later (for word output +# assisting with model selection) + + +# So that's all of the TSD14 survival analysis done. The next step is to programmatically +# proliferate comparative efficacy + + + +# On a tablet with very little computational power this takes a couple of minutes to run. on a new +# laptop its not long at all + + +# So, to pull out the visual fit of the analysis of TTD for a population + +# i$surv$reg$pop_0$line_1$mol_7$trial_0$endpoint_0$plot$plot +# i$surv$reg$pop_0$line_1$mol_1$trial_0$endpoint_3$plot$plot +# i$surv$reg$pop_0$line_1$mol_7$trial_0$endpoint_1$plot$plot + +# More importantly, to pull a particular extrapolation: +# i$surv$reg$pop_0$line_1$mol_1$trial_0$endpoint_1$st[,"weibull"] + +# Where the "weibull" part would come from a dropdown list in the Excel front-end of the model specific +# to that endpoint for that treatment for that line for that population (i.e. a lot of selections need to be made!) + + +# The stuff inside of i$surv$reg can be used to automatically populate a report presenting the full plot, regression summaries, +# goodness-of-fit results and the fit of the selected (via excel) distribution. The output can then be manually appended to include +# written justification for the selection(s) to drastically reduce the overhead associated with reporting survival analysis +# results and decisions made. + + +# 3.3.4 Survival analysis reporting --------------------------------------- + +# the next step is to go through all of the results based directly on survival data +# and produce a readout containing: +# +# - Regression summary tables +# - goodness of fit +# - extrapolations (short and long-term) for visual fit assessment +# +# Each of these should have a separate section which at least states the identifiers +# (i.e., translating from numbers to text as in Section 3.4.2 above) +# +# The best way to do this is with either Reduce or base for loops: +# + +# Produce all the KM, extrapolations and gof tables for decisions on the front-end + +# Note whether or not the survival analysis report is run by the code is set in Excel as this takes a long time to produce +# This cannot be produced without access to PLD + +if (i$dd_report_req_surv_reg=="Yes") { + + doc_surv <- f_surv_makeTSD14Report( + fs_res = i$surv$reg, + id = i$id$ipd, + lookup = i$lookup$ipd + ) + print(doc_surv, target = "./4_Output/Survival_Analysis.docx") + + rm(doc_surv) +} + + +# 3.3.5 Comparative efficacy propagation (NMA) --------------------------------------------------------------- + +# Pull in the data and calculate means by pop line mol endpoint reftrt and reftrial + +# First read in RDS file containing the PH NMA coda samples + + +# 3.3.5.1.1 PH NMA data ----------------------------------------------------- + +# Option to read in PH NMA CODA from local machine, uncomment this and comment out the line below to use +RDS_path2 <- "./1_Data/PH_NMA_CODA.rds" +if (file.exists(RDS_path2)) { + i$PHNMA <- readRDS(RDS_path2) +} else { + i$PHNMA <- readRDS(rstudioapi::selectFile( + caption = "Please select 'PH_NMA_CODA.rds'", + label = "PH_NMA_CODA.rds", + path = "./1_Data/", + filter = "R Files (*.rds)", + existing = TRUE + )) +} + + +colnames(i$PHNMA$data) <- c("Run", "Population", "Line", "Molecule", "Endpoint", "Reference.treatment", "Reference.trial", "HR") +i$PHNMA$data$Reference.endpoint <- i$PHNMA$data$Endpoint + + +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! + +i$PHNMA$assume3L <- i$PHNMA$data[Line==2,] +i$PHNMA$assume3L$Line <- 3 +i$PHNMA$data <- rbind(i$PHNMA$data,i$PHNMA$assume3L) + +i$PHNMA$assumeTTD <- i$PHNMA$data[Endpoint==1,] +i$PHNMA$assumeTTD$Endpoint <- 2 +i$PHNMA$data <- rbind(i$PHNMA$data,i$PHNMA$assumeTTD) + +i$PHNMA$assumeTTP <- i$PHNMA$data[Endpoint==1,] +i$PHNMA$assumeTTP$Endpoint <- 3 +i$PHNMA$data <- rbind(i$PHNMA$data,i$PHNMA$assumeTTP) + +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! + +# Calculate the mean from the CODA samples for deterministic analysis + +i$PHNMA$means <- i$PHNMA$data[,.(HR = mean(HR)),by=list(Population,Line,Molecule,Endpoint,Reference.treatment,Reference.trial)] + + +# 3.3.5.1.2 DETERMINISTIC CODA -------------------------------------------- + +# for the deterministic analysis we use the means. +p$releff$CODA$PH <- i$PHNMA$means + + +# 3.3.5.2.1 FP NMA data ----------------------------------------------------- + +# Load in FP NMA data +i$FPNMA <- list() + +#read in means for deterministic and PSA parameters for probabilistic + +# option to read in from local machine, uncomment the below and comment out line 949 to use +RDS_path3 <- "./1_Data/FPNMA_means.rds" +if (file.exists(RDS_path3)) { + i$FPNMA$means <- readRDS(RDS_path3) +} else { + i$FPNMA$means <- readRDS(rstudioapi::selectFile( + caption = "Load in FP NMA CODA (FPNMA_means.rds)", + label = "FPNMA_means.rds", + path = "./1_Data/", + filter = "R Files (*.rds)", + existing = TRUE + )) +} + + +#tidy means column names and timing +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "intervention_code"] <- "Molecule" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "reference_treatment_code"] <- "Reference.treatment" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "ref_trial_code"] <- "Reference.trial" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "population"] <- "Population" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "line"] <- "Line" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "endpoint"] <- "Endpoint" +colnames(i$FPNMA$means)[colnames(i$FPNMA$means) == "V1"] <- "HR" + +i$FPNMA$means$time <- round(i$FPNMA$means$time * 52 / 12) + +# means + +# Rebasing to allow use of cabo as reference treatment in 2nd line +# repeats for means (stored in i which are later transferred to p) +i$FPNMA$means <- f_rebase_for_cabo_as_ref_in_2L(FPNMAdata = i$FPNMA$means) + +# Remove the now redundant objects we made in order to do this + +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +i$FPNMA$means <- f_3L_rel_effect_same_as_2L(FPNMAdata = i$FPNMA$means) + +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! +# IMPORTANT: 3L relative effectiveness is assumed the same as 2L!!!! + +# Create 1 row for each destination PLMTE, so that we know where to put the +# fp data without having to iterate much +i$FPNMA$destinations <- f_gen_destinations(fp_data = i$FPNMA$means) + +# add in reference.trial 2 +i$FPNMA$means <- f_add_reference_trial_2(fp_data = i$FPNMA$means) + + +# 3.3.5.2.2 DETERMINISTIC CODA -------------------------------------------- + +p$releff$CODA$FP <- i$FPNMA$means +p$releff$fp_dest <- i$FPNMA$destinations[!is.na(Molecule), ] + +# limit to time horizon +p$releff$CODA$FP <- p$releff$CODA$FP[time <= p$basic$th, ] + +#eliminate NAs in molecule +p$releff$fp_dest <- i$FPNMA$destinations[!is.na(Molecule), ] +# A note on i vs p --------------------------------------------------------------- + +# P is for the parameters for one model scenario. the relative efficacy network is required +# in order to compute the s(t) for all the different PLMTEs we need to power the model with. +# Therefore, the samples which are used should go into p not i. +# +# However, the full CODA samples are a different matter as particularly for the +# FPNMA these are large files and there's no need to copy paste this many times. +# +# Instead when we get to the point of the releff network, THEN we can be putting +# it into p. this is because for a particular probabilistic iteration, +# scenario and so on we can pull through the right HRs to the right place! +# + +# 3.3.5.3 Empty relative efficacy network --------------------------------- + +# Turn this into a list structure using the same naming convention as the rest of the model: +# +# +# For this population, population line molecule trial and endpoint, generate a list +# of spaces containing information on the relationship between other +# population line molecule trial and endpoint pairings and this one. +# +# For example, we need to be able to do the following: +# +# - HR applied to other subgroup for same line mol tr endpoint +# - HR applied to same subgroup for different line same mol tr endpoint +# - HR applied to same subgroup same line different mol different tr same endpoint +# - HR applied to same subgroup same line different mol same tr same endpoint +# - HR applied to same subgroup same line different mol tr endpoint +# +# The best way to cope with all this is to basically list out where +# the extrapolation is coming from USING THE SAME NAMES AS IN i$surv$reg +# but with the addition of the selected distribution +# +# So, we have a list with dest for destination (i.e. this extrapolation) +# origin (where it's coming from), and hr (what to apply to it) +# +# The next step (a different function) populates orig +# + + +p$releff$network <- f_NMA_generateNetwork(i$id$ipd,i$lookup$ipd) + +# To visualize it a bit, the structure looks like tree roots. Like this: +# 1 Root +# 2 ¦--pop_0 +# 3 ¦ ¦--line_1 +# 4 ¦ ¦ ¦--mol_0 +# 5 ¦ ¦ ¦ ¦--trial_0 +# 6 ¦ ¦ ¦ ¦ ¦--endpoint_0 +# 7 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 8 ¦ ¦ ¦ ¦ ¦ °--orig +# 9 ¦ ¦ ¦ ¦ ¦--endpoint_1 +# 10 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 11 ¦ ¦ ¦ ¦ ¦ °--orig +# 12 ¦ ¦ ¦ ¦ ¦--endpoint_2 +# 13 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 14 ¦ ¦ ¦ ¦ ¦ °--orig +# 15 ¦ ¦ ¦ ¦ ¦--endpoint_3 +# 16 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 17 ¦ ¦ ¦ ¦ ¦ °--orig +# 18 ¦ ¦ ¦ ¦ ¦--endpoint_4 +# 19 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 20 ¦ ¦ ¦ ¦ ¦ °--orig +# 21 ¦ ¦ ¦ ¦ ¦--endpoint_5 +# 22 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 23 ¦ ¦ ¦ ¦ ¦ °--orig +# 24 ¦ ¦ ¦ ¦ ¦--endpoint_6 +# 25 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 26 ¦ ¦ ¦ ¦ ¦ °--orig +# 27 ¦ ¦ ¦ ¦ °--endpoint_7 +# 28 ¦ ¦ ¦ ¦ ¦--dest +# 29 ¦ ¦ ¦ ¦ °--orig +# 80 ¦ ¦ ¦--mol_1 +# 81 ¦ ¦ ¦ ¦--trial_0 +# 82 ¦ ¦ ¦ ¦ ¦--endpoint_0 +# 83 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 84 ¦ ¦ ¦ ¦ ¦ °--orig +# 85 ¦ ¦ ¦ ¦ ¦--endpoint_1 +# 86 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 87 ¦ ¦ ¦ ¦ ¦ °--orig +# 88 ¦ ¦ ¦ ¦ ¦--endpoint_2 +# 89 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 90 ¦ ¦ ¦ ¦ ¦ °--orig +# 91 ¦ ¦ ¦ ¦ ¦--endpoint_3 +# 92 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 93 ¦ ¦ ¦ ¦ ¦ °--orig +# 94 ¦ ¦ ¦ ¦ ¦--endpoint_4 +# 95 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 96 ¦ ¦ ¦ ¦ ¦ °--orig +# 97 ¦ ¦ ¦ ¦ ¦--endpoint_5 +# 98 ¦ ¦ ¦ ¦ ¦ ¦--dest +# 99 ¦ ¦ ¦ ¦ ¦ °--orig +# 100 ¦ ¦ ¦ ¦ °--... 2 nodes w/ 4 sub +# 101 ¦ ¦ ¦ °--... 2 nodes w/ 54 sub +# 102 ¦ ¦ °--... 12 nodes w/ 956 sub +# 103 ¦ °--... 5 nodes w/ 6288 sub +# 104 °--... 3 nodes w/ 25463 sub + +# A function (or functions) is (are) required to do several things, IN THIS ORDER: +# +# 1. Put the HRs from the PH NMA in the destinations, using the CODA sample identifiers to set the origin. +# 2. Put the time-varying HRs from the FP NMA in the destinations, using the identifiers to set the origins +# 3. Use the table R_table_eff_data_settings from Excel to apply any superseding & any assumption/ad-hoc stuff (HRs, assume equal to and so on) +# 4. Use the final network object to propagate relative efficacy throughout the network, producing a set of extrapolations with RE applied. +# + +# Generate DESTINATION trial number column if it doesn't already exist: +if(!"Trial" %in% colnames(i$R_table_eff_data_settings)) { + i$R_table_eff_data_settings$Trial <- i$lookup$ipd$trial$Number[match(i$R_table_eff_data_settings$Trial.name.if.effectiveness.source.is.trial,i$lookup$ipd$trial$Description)] +} + + + +# 3.3.5.4 Linking inputs from PH and FP NMAs -------------------------------------------- + + +# Use the information we have from the PHNMA CODA sample to populate the corresponding +# places within p$releff$network +# +p$releff$network <- f_NMA_linkPHNMA( + network = p$releff$network, + hr_table = p$releff$CODA$PH +) + +# All treatments included in the network: +# unique(c(unique(p$releff$means$Molecule),unique(p$releff$means$Reference.treatment),unique(i$R_table_eff_data_settings$Origin.treatment),unique(i$R_table_eff_data_settings$Treatment))) + +# Link in the fractional polynomial point estimate time-varying hazard ratios +p$releff$network <- f_NMA_linkFPNMA( + network = p$releff$network, + destinations = p$releff$fp_dest, + hr_table = p$releff$CODA$FP, + time_horizon = p$basic$th +) + +# See e.g. +# p$releff$network$pop_0$line_1$mol_1$trial_0$endpoint_0 +# +# To see that the time-varying hazard has been passed along :) + + +# Remember that we only need to apply this one to the stuff that is NOT direct +# survival analysis applied to the data. HOWEVER, we need those rows in the table +# as well as we need the "origin" distributional selections to fill in our output! + + + +# 3.3.5.5 Assumptions from Excel ------------------------------------------ + +# Apply the setting dd_use_PHnma_for_FPnma from excel. This supersedes what's selected +# in efficacy settings, changing all FP NMA entries to PH NMA to force the model to use +# the PH NMA CODA sample over and above the FP NMA! + +if (i$dd_use_PHnma_for_FPnma == "Yes") { + i$which_fpnma <- which(i$R_table_eff_data_settings$Effectiveness.data.source == "FP_NMA") + i$R_table_eff_data_settings$Effectiveness.data.source[i$which_fpnma] <- "PH_NMA" +} + +# QC point: to test whether all entries to Effectiveness.data.source are in the +# lookup list, compare these two: +# +# table(i$R_table_eff_data_settings$Effectiveness.data.source) +# +# i$List_eff_datasources +# +# i.e., +# +# setdiff(names(table(i$R_table_eff_data_settings$Effectiveness.data.source)),i$List_eff_datasources) +# +# "0" is fine as that's empty cells, but there should be nothing else. +# + +# setdiff(names(table(i$R_table_eff_data_settings$Effectiveness.data.source)),i$List_eff_datasources) + + +p$releff$network <- f_NMA_AddAssumptionsToNetwork( + network = p$releff$network, + phnma_table = p$releff$CODA$PH, + fpnma_table = p$releff$CODA$FP, + fpnma_destinations = p$releff$fp_dest, + excel_table = data.table(i$R_table_eff_data_settings), + trial_flag = i$List_eff_datasources[1], + fpnma_flag = i$List_eff_datasources[3], + phnma_flag = i$List_eff_datasources[2], + et_flag = i$List_eff_datasources[4], + ahr_flag = i$List_eff_datasources[5], + verbose = qc_mode +) + + +# Example to see all the HRs for population 0 in some nice tables +# +# f_releff_extract_all_HRs(p$releff$network) +# +# assign to an object to browse through it + + +# 3.3.5.6 Propagating the network ----------------------------------------- + +# Now that we have collected together all of the survival extrapolations for all +# of the data we have, and all of the HRs, whether by assumption or derived +# via NMA, plus all of the underlying distributional selections, we can proceed +# to "proliferate" the evidence network, starting from the extrapolations we have already +# and working "outwards" from that point, arriving at ONLY the extrapolation for each +# possibility (rather than all distributions). From there, we can "cherry-pick" +# those population line molecule trial endpoint distribution HR application choices +# to "build-up" a given treatment pathway. + + +# These are still "raw" extraps, so should be in i as they're not used for +# computation of pf. + +i$surv$extraps <- f_surv_getExtrapolations(regs = i$surv$reg) + +# propagate the extrapolation and comparative efficacy data/assumptions: +# +# Because this is for a particular iteration of the model, it should go into object p +# which is the input set for running one model. +# +# +p$surv$st <- f_releff_PropNetwork( + network = p$releff$network, + extraps = i$surv$extraps, + dos = 10, + verbose = qc_mode, + dist_lookups = p$basic$lookup$dist, + excel_table = data.table(i$R_table_eff_data_settings) +) + +# active trial depends on excel settings so it's hard to anticipate. 2 for all +# lines if rwe and + +# # pop 0 first line nivo mono trial 0 +# f_qc_surv_gethead(p$surv$st,p = 0,l = 1,m = 1,t = 2,len = 3) +# +# # pop 0 line 2 nivo mono trial 1 +# f_qc_surv_gethead(p$surv$st,p = 0,l = 2,m = 0,t = 2,len = 3) +# +# # follow pazo through the lines - so close! only OS in 4L but the others are all working +# # (just show me the first couple of rows from each for brevity). mol 5 selected +# # because it runs through all treatment lines +# f_qc_surv_gethead(p$surv$st,p = 0,l = 1,m = 5,t = 0,len = 3) +# f_qc_surv_gethead(p$surv$st,p = 0,l = 2,m = 5,t = 1,len = 3) +# f_qc_surv_gethead(p$surv$st,p = 0,l = 3,m = 5,t = 1,len = 3) +# f_qc_surv_gethead(p$surv$st,p = 0,l = 4,m = 5,t = 1,len = 3) +# +# # Risk pop 1 first line nivo cabo from trial and then rwe: +# f_qc_surv_gethead(p$surv$st,1,1,1,2) + + +# 3.3.6 Extrapolation modifications --------------------------------------- + +# This subsection focuses on applying the required modifications to the "raw" extrapolations +# now that we have propagated comparative efficacy. We need to first apply any adjustments for treatment effect waning +# and then check the following: +# +# - mortality never falls below that of the general population +# - The extrapolations which are NOT censored for death do not cross (in abs or hazard) OS + +# During technical engagement functionality will be added here to adjust for the impact of prior adjuvant treatment as part of scenario analysis + +if(i$dd_adjforprioradjuvant == "Yes") { + p$surv$st <- f_surv_adjuvant_HR( + st = p$surv$st, + adjuvant_impact = i$R_table_prior_IO_impact_eff, + demo_table = p$demo$table, + lookup = p$basic$lookup, + verbose = TRUE) +} + + + +# Curve crossing should be fixed on the treatment sequence level, NOT the original PLMT level +# through E. This is because doing it that way round could lead to implausible +# extrapolations. For instance, if the PFS from somewhere else was being injected +# in (e.g. if trial 0 for OS and trial 1 from a different PLMT for PFS via equivalence +# assumption), it may be the case that the assumed equal PFS crosses OS. +# +# Therefore, curve crossing fixes should be performed on the individual treatment +# pathways. +# +# In conclusion, this section ONLY adjusts for gpop mortality, and curve crossing +# adjustment is performed AFTER f_seq_extrapCollector is used to pull through +# the correct extrapolations to the correct DESTINATION locations. +# +# The function is ONLY applied to OS lines. +# +# Because we have pulled through the metatadata to our PLMTEs s(t) objects, +# we can use the "dest" element to decide which row of Excel range R_table_ptchar +# to apply in the general population mortality +# + +# Firstly, treatment effect waning. This is done on unadjusted curves and is +# not included in the model base-case. +# +# Make the information on patient characteristics from Excel into a data table + +i$R_table_ptchar <- data.table(i$R_table_ptchar) + + + + + +# BEFORE adjusting for general population, we must apply treatment effect waning. +# Evidence has suggested that this predominantly affects 1L treatments, +# and the table "R_table_TE_waning_settings" has been populated to capture +# waning. This provides information on the "destination" and the nature +# of the treatment effect waning. The "destination" information is then +# used to link to the efficacy settings table "R_table_eff_data_settings" +# in excel. This table contains the corresponding information on the "origin". +# In the case of 1L treatments, this is the reference curve + +##### Treatment effect waning application is defined in the main Excel workbook for input +##### For each treatment and outcome: +##### apply: yes / no +##### method: either absolute survival or hazards +##### when TE waning starts (years) +##### when TE waning is fully implemented (years) +##### linear graduation is used between the start and end time + +# Note this function produces a warning which details when there is a danger of TE waning in the usual fashion (setting hazards the same) +# being implemented producing counterintuitive results. In this case we use the higher of the hazards to prevent this problem + +if (sum(i$R_table_TE_waning_settings$apply.waning == "Yes") > 0) { + p$surv$st <- f_surv_twaning_apply( + st_list = p$surv$st, + tab_waning = data.table(i$R_table_TE_waning_settings), + tab_eff_set = data.table(i$R_table_eff_data_settings), + verbose = qc_mode + ) +} + +# Generate all the gpop lines we can generate: +p$surv$gpop <- if (i$dd_age_sex_source == "Mean") f_surv_GenOSLines_det( + R_table_ptchar = i$R_table_ptchar, + R_table_mort_lifeTable = i$R_table_mort_lifeTable, + t_yr = p$basic$t_yr, + lookups = i$lookup +) else f_surv_GenOSLines_ipd( + R_table_patientagesex = i$R_table_patientagesex, + R_table_mort_lifeTable = i$R_table_mort_lifeTable, + t_yr = p$basic$t_yr, + lookups = i$lookup +) + + +# During QC model we produce a comparison between mean-based and IPD based general population OS lines: +if (qc_mode) { + i$gpop <- list( + means = f_surv_GenOSLines_det( + R_table_ptchar = i$R_table_ptchar, + R_table_mort_lifeTable = i$R_table_mort_lifeTable, + t_yr = p$basic$t_yr, + lookups = i$lookup + ), + ipd = f_surv_GenOSLines_ipd( + R_table_patientagesex = i$R_table_patientagesex, + R_table_mort_lifeTable = i$R_table_mort_lifeTable, + t_yr = p$basic$t_yr, + lookups = i$lookup + ) + ) + + # Compare all risk population first-line across the 2 methods + + i$gpop$plotdat <- data.table( + t = rep(p$basic$t_yr,2), + os = c(i$gpop$means$pop_0$line_1$os,i$gpop$ipd$pop_0$line_1$os), + method = c(rep("Means",p$basic$th+1),rep("Patient data",p$basic$th+1)) + ) + + i$gpop$comp_plot <- ggplot(i$gpop$plotdat, aes(x = t, y = os, colour = method)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom", legend.title=element_blank()) + + labs(title = NULL, x = "Time (years)", y = "% Survival") + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + + scale_y_continuous(labels = scales::percent) + + if(qc_mode) { + ggsave( + filename = file.path("./4_Output/","gpop_1L_method_comparison.png"), + plot = i$gpop$comp_plot, + device = "png", + units = "cm", + width = 15 + ) + } +} + +# adjust all OS lines in p$surv$st for gpop mortality +p$surv$st <- f_surv_gpopadjust(st = p$surv$st, + gpop = p$surv$gpop, + method = "hazardmax", + verbose = qc_mode) + +# Adjust PFS and TTD for OS - PFS and TTD cannot exceed OS +p$surv$st <- f_surv_PFSxOS(p$surv$st, if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) + +p$surv$st <- f_surv_TTDxOS(p$surv$st, if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) + +# Adjust TTP, PFS cannot go above TTP, this is done on absolute survival rather than allowing flexibility to look at hazards +p$surv$st <- f_surv_PFSxTTP(st = p$surv$st,method = "abs") + +# The below should produce a string of positive or 0s +# p$surv$st$pop_0$line_1$mol_7$trial_2$endpoint_3$st - p$surv$st$pop_0$line_1$mol_7$trial_2$endpoint_1$st + +# Last assumption - 5L =4L. this moves over BSC when it comes after 4 active treatments. + +p$surv$st <- lapply(p$surv$st, function(popu) { + popu$line_5 <- popu$line_4 + return(popu) +}) + + +# 3.3.7 Visual QC of final curves --------------------------------------- + +# Here's an example of how to run the QC plots. These serve as a useful check +# later on too, because it reveals where some PLMTEs have not been populated. + +# f_qc_surv_ExtrapPlot( +# st = p$surv$st, +# popu = "pop_0", +# li = "line_1", +# mo = "mol_1", +# tr = "trial_2", +# t_yr = p$basic$t_yr, +# th = p$basic$th +# ) +# +# f_qc_surv_EstHazPlot( +# st = p$surv$st, +# gpop = p$surv$gpop, +# popu = "pop_0", +# li = "line_1", +# mo = "mol_1", +# tr = "trial_2", +# t_yr = p$basic$t_yr, +# th = p$basic$th +# ) + + +# Here is a QC method for visually having a look at the extrapolated survival +# after all relative efficacy has been applied. This creates a LOT of graphs. You can look through them by pressing the arrow on the plots window +if (qc_mode) { + i$excel_destinations <- data.table(i$R_table_eff_data_settings)[Include.in.this.analysis.=="Yes",list(Population,Treatment.line,Molecule,Origin.trial,End.point)] + i$surv$ExtrapSenseCheck <- Reduce( + x = 1:nrow(i$excel_destinations), + init = f_NMA_generateNetwork(i$id$ipd,i$lookup$ipd), + accumulate = FALSE, + f = function(prev, dest_row) { + dat <- as.list(i$excel_destinations[dest_row,]) + + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$End.point) + ) + + cat(paste0( + "Drawing plots: ", + " row ", dest_row, " i$surv$ExtrapSenseCheck$", + d$pop, "$", + d$line, "$", + d$mol, "$", + d$trial, "$plots", + "\n" + )) + + p_extrap <- f_qc_surv_ExtrapPlot( + st = p$surv$st, + popu = d$pop, + li = d$line, + mo = d$mol, + tr = d$trial, + t_yr = p$basic$t_yr, + th = p$basic$th + ) + + p_haz <- f_qc_surv_EstHazPlot( + st = p$surv$st, + gpop = p$surv$gpop, + popu = d$pop, + li = d$line, + mo = d$mol, + tr = d$trial, + t_yr = p$basic$t_yr, + th = p$basic$th + ) + + plmt <- prev[[d$pop]][[d$line]][[d$mol]][[d$trial]] + plmt$plots <- list( + p_extrap = p_extrap, + p_haz = p_haz + ) + + # Save 2 plots for each PLM available. Takes a while to run. + if (qc_mode) { + ggsave( + filename = file.path("./4_Output",paste0("p_extrap_",paste(d[1:4],collapse="_"),".png")), + plot = plmt$plots$p_extrap, + device = "png", + units = "cm", + width = 15 + ) + ggsave( + filename = file.path("./4_Output",paste0("p_tp_",paste(d[1:4],collapse="_"),".png")), + plot = plmt$plots$p_haz, + device = "png", + units = "cm", + width = 15 + ) + } + + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]] <- plmt + + return(prev) + + } + ) +} + + + + +# 3.4 Preparation of p ------------------------------------- + +# !!!!!!!!!!!!!!!!!!!IMPORTANT, PLEASE READ!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# +# Before the patient flow sheet can be computed, it is imperative to populate +# all of the elements of p which are required to compute it, such that the +# function f_pf_computePF() really only needs one argument, p, to run (with a +# few extra arguments for verbosity and limiting which populations to run etc). +# +# This is true of all analyses, including PSA. In the PSA p_psa will contain +# the components of p which require random number generation (RNG), whilst p +# itself will be used for all the stuff that does not change. +# +# Therefore, this section populates the "rest" of the model inputs (i.e. those +# that are not informing the sequences or disease models). This includes costs, +# QALYs and AEs. +# +# !!!!!!!!!!!!!!!!!!!IMPORTANT, PLEASE READ!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# + +# Passing along an input for the ps model in case it is needed. this assumes +# prop of pre-progression deaths. +p$surv$prop_deathinPFS <- i$dd_prop_deathinPFS + + +# 3.4.1 Demographics ------------------------------------------------------ + +# Demographics are simply processed from the tables in Excel. + +p$demo$agg <- f_cleaning_ptchar(i$R_table_ptchar, i$lookup) + +# Deterministic version is very easy. +p$demo$live <- p$demo$agg + +# 3.4.2 QALYs ------------------------------------------------------------- + +# Utilities are applied to the disease model by treatment by line and whether the patient is on or off treatment +# Age adjustment is conducted multiplicatively in line with DSU guidance using earlier defined patient characteristics for age and sex + +# Extracting from excel file + +p <- add_population_utility_params(p, psa = FALSE, .i = i) + +# Pre-calculate the population utility norms since they will be the same across +# all sequences (though may vary across populations), and store in p + +base_utility <- data.frame(cycle = 0:p$basic$th, utility = 1) +if (i$dd_ageadjuutilities == "Yes") { + if (i$dd_age_sex_source == "Mean") { + # We find the row corresponding to line 1 for each relevant population + + # Do a lot of wrangling to get in the format we want... + ptc_L1 <- i$R_table_ptchar[Treatment.line == 1, c(1, 3, 4)] + colnames(ptc_L1) <- c("Population", "age", "sex") + ptc_L1$sex <- 1 - ptc_L1$sex + ptc_L1 <- merge(ptc_L1, i$lookup$ipd$pop, by.x = "Population", by.y = "Description") + ptc_L1 <- ptc_L1[order(ptc_L1$Number), c("age", "sex", "Number")] + ptc_L1 <- split(ptc_L1[, c("age", "sex")], paste0("pop_", ptc_L1$Number)) + + p$util$gpop <- lapply(ptc_L1, function(pop) adjust_utility( + age = pop$age, + sex = pop$sex, + utilities = base_utility, + .patient_level = FALSE, + .p = p + )) + + } else { + # We will only include IPD from line 1, since the population + # norm is applied according to absolute model time rather than + # than time in state. We don't know which population they are + # in, so we will replicate for pop_0, pop_1 and pop_2. + ipd_L1 <- i$R_table_patientagesex$Line == 1 + p$util$gpop <- list() + p$util$gpop$pop_0 <- adjust_utility( + age = i$R_table_patientagesex$Age[ipd_L1], + sex = if_else(i$R_table_patientagesex$Gender[ipd_L1] == "M", "male", "female"), + utilities = base_utility, + .patient_level = TRUE, + .p = p + ) + p$util$gpop$pop_1 <- p$util$gpop$pop_0 + p$util$gpop$pop_2 <- p$util$gpop$pop_0 + } +} else { + p$util$gpop <- list(pop_0 = 1, pop_1 = 1, pop_2 = 1) +} + +# Remove the object we made which is not used again +rm(base_utility) + +i$QALYs <- list() + +i$QALYs$utilities$means <- f_process_utilities(raw_utilities = i$R_table_util, + PSA = FALSE, + samples = FALSE) +# Sample code for PSA - temporariliy in file probabilistic_model_DEV.R + +# For the deterministic analysis, pass the means into p for use in the model. +# We now have our population norms for calculating gpop utility multiplier +# compared to baseline as well as our HSUVs by PLM and treatment status. everything +# we need. +# +p$util$mk <- data.table(i$QALYs$utilities$means) + + +# 3.4.3 AEs --------------------------------------------------------------- + +# the method to apply AEs (one-off or per cycle) is defined in the Excel inputs file +# this applies to both costs and QALYs + +# options related to the source of AE data are defined in the Excel workbook +# the functions below read in the information required to produce AE costs and utilities per cycle, this includes AE durations +# and the trial durations associated with the rates which are used to produce one off cost and QALY impacts + +p$ae$aetype <- i$dd_apply_AE_options + +# when one off we need trial duration: +p$ae$duration <- data.table(i$R_table_AE_rates) + +# produce the per cycle impact for each of the AEs + +p$ae$mk$per_cycle <- data.table(f_process_adverse_events( + AE_costs = i$R_table_AE_costs, + AE_disutil = i$R_table_AE_util, + AE_duration = i$R_table_duration, + AE_rate = i$R_table_AE_rates, + comparators = i$lookup$trt, + weeks_per_year = p$basic$cl_w / p$basic$cl_y, + PSA = FALSE +)) + +# they all match up with column RCC_input_desc from Excel except BSC (mol_999). +# Set BSC (assumed to have 0 AE impact): +p$ae$mk$per_cycle[trt == "BSC",]$trt <- i$lookup$ipd$mol[match(999,Number),]$RCC_input_desc + +# Convert to numbers. Now it's ready for use in the patient flow sheet. +p$ae$mk$per_cycle$molecule <- i$lookup$ipd$mol[match(p$ae$mk$per_cycle$trt,RCC_input_desc),]$Number + +# Add in the AE approach switch +p$ae$approach <- i$dd_apply_AE_options + +# 3.4.4 Costs ------------------------------------------------------------- + +# Drug and admin costs and MRU costs are considered within this section +# Drug and admin costs are applied per treatment per line (as drug costs may differ depending on what line treatment is used at) +# The impact of stopping rules is consider as part of the calculation of drug and admin costs rather than in determining whether +# patients in the on or off treatment health states +# MRU costs are applied per treatment and by on and off treatment status as the EAG was advised that MRU is the same across different lines +# When this model is expanded to a generic version flexibility to define per line will be added +# One off costs are included for treatment initiation at each line of treatment, terminal care at the end of life (applied on death) +# and progression (radiotherapy and surgery costs) + +i$cost <- list() + +# Put the deterministic drug cost inputs into p: +# +p$costs$mk <- f_process_cost_data( + drug_and_admin = i$R_table_drug_admin_costs, + per_cycle_costs = i$R_table_MRU, + time_horizon = p$basic$th, + max_trt_lines = p$basic$R_maxlines, + RDI_source = i$dd_sc_RDI, + verbose = FALSE, + PSA = FALSE, + samples = 1) + +# For PSA, You can pull psa iteration like this (done with random as test): +# psa_it <- round(runif(1)*1000,0) +# ooc_psa <- one_off_costs[,c("Type.of.cost", "Type", "Apply.to",paste0("V",psa_it)),with = FALSE] +# setnames(ooc_psa, paste0("V",psa_it),"cost") +# print(ooc_psa) +# +# Alternatively, passs the whole table into PSA version of model as it's small +# data. Can't do that for survival obviously, but we can for smaller data. +# +p$costs$oneoff <- f_process_other_cost_data( + one_off_costs = i$R_table_MRU_oneoff, + PSA = FALSE +) + +# pull out the individual inputs required for the ST model. In a PSA run +# this would be the nth iteration. +p$costs$oneoff_mk <- p$costs$oneoff[,.(cost = sum(cost)),by=list(Apply.to)] +p$costs$oneoff_mk <- lapply(structure( + p$costs$oneoff_mk$Apply.to, + .Names = p$costs$oneoff_mk$Apply.to +), function(x) { + p$costs$oneoff_mk[Apply.to == x, ]$cost +}) + +# These are then added to the first element of the cost vectors in the patient flow +# function. + +#holding line for PSA - this will be replaced by function sampling from means and SEs + +if (FALSE) { + i$cost$drug_and_admin_cost_by_tunnel_state$PSA <- f_process_drug_and_admin_cost_data( + raw_table = i$R_table_drug_admin_costs, + PSA_samples = TRUE) +} + + + +# 3.4.4 Subsequent treatment ------------------------------------------------------------- + +# Read in cost and QALY consequences for subsequent treatments per first line option from Excel +# This information is only used whaen the PartSA model structure is selected + +p$substrt$partsa <- as.data.table(f_process_subs_txt_data( + subs_txt = i$R_table_sub_txt_cum_costs, + PSA = FALSE +)) + + +# 3.5 Population mapping -------------------------------------------------- + +# There are 6 populations. These are combinations of +# +# - the risk populations (3 of them) +# - whether or not patients have had prior adjuvant therapy with immune-oncology treatments (2 of them) +# +# This makes for a possible 6 combinations, which are mapped to each other +# in the excel table i$r_overall_lookup_pop +# +# Currently the treatment sequences are sorted into 4 different populations. (combination) +# Currently the final survival extrapolations are sorted into 3 different populations (risk) +# Currently the costs, QALYs and AEs are sorted into 3 different populations (risk) +# +# Fortunately this table allows us to simply refer to the appropriate populations +# whilst calculating the patient flow. As an example: +# +# for overall population 1: +# +# seq_pop <- p$basic$lookup$pop_map[1,]$Sequencing.population.number +# risk_pop <- p$basic$lookup$pop_map[1,]$Risk.population.number +# +# Then use seq_pop to pull sequences & risk_pop for everything else. +# +# + +p$basic$lookup$pop_map <- data.table(i$r_overall_lookup_pop) + +# SECOND NOTE: In later treatment lines the risk population is always 0. +# PLEASE READ To prevent replicating more and more data, the model simply +# pulls from (risk) population 0 for later lines within the function +# f_seq_extrapCollector, which "collects" the correct extrapolations. +# This simply pulls from (risk) population 0 if line is more than 1 +# +# THIS IS A MODELLING ASSUMPTION AS WE WERE INFORMED THAT RISK IS NOT +# MEASURED AT 2L AND PRIOR RISK STATUS DOES NOT IMPACT TREATMENT +# OUTSIDE OF PRIOR THERAPY RECEIVED. A FULLY GENERIC MODEL WOULD NOT MAKE THIS ASSUMPTION +# AND INSTEAD WOULD HAVE A LARGER TABLE IN THE EFFICACY SETTINGS +# SHEET COVERING THE WHOLE MATRIX OF POP/LINE +# + + + +# 3.6 PATIENT FLOW ------------------------------------------------------ +# Now that we have all of the disease evidence and modeled outcomes available, +# we can compute the disease model. In the deterministic case, this will simply +# be a trace for both the PS and Markov models. + +i$R_table_eff_data_settings <- data.table(i$R_table_eff_data_settings) +p$releff$excel_table <- i$R_table_eff_data_settings + + +if(!str_trim(i$dd_model_struct) %in% c("State transition", "Partitioned survival")) stop( + "The model structure is not one of the available options. These are 'State transition' and 'Partitioned survival'. Either code or the excel file are out of date or wrongly set up" +) + + +# The process of computing patient flow is as follows: +# +# - Create an empty object for housing the patient flow (a list) +# - Populate the trace with either disease modelling approach +# - Record metadata on the approach, as this determines what the trace looks like +# - Compute costs based on time in state +# - Compute QALYs based on time in state +# - Compute AEs based on time in state +# - Costs +# - QALYs +# - Return the populated pf list +# + + +pf <- list() +res <- list() + + +# 3.6.1 Run the model ----------------------------------------------------- + +# Running the model can mean two different structures, a markov model which +# we refer to as state transition or a partitioned survival approach. These +# both work using the same parameters object p. There is only one function +# to compute patient flow, f_pf_computePF. This takes several different arguments +# but the main one is p, or the parameters object to power the model with. +# Everything within f_pf_computePF uses objects which are within p, such that +# a copy of p could be saved as a file and as long as all functions have been +# defined and packages loaded the model can be run directly from there. + +# The only structures allowed are state transition and partitioned survival. error +# if it is not one of those: +stopifnot(p$basic$structure %in% c("State transition","Partitioned survival")) + +# Check the decision problem. If it's cabo+nivo only the first 3 overall +# populations are relevant as one cannot get cabo+nivo in pops 4-6 +if(p$basic$decision_problem == "cabo+nivo") { + p$basic$pops_to_run <- 1:3 +} else { + p$basic$pops_to_run <- NULL +} + +# populate the pf object irrespective of model structure or overall populations +# to include. +# +# Note that if you put n_cores as NULL or 1 then the model will run in sequence. + +# For QC, you can easily just save i and p as a file so all you need to do is +# load libraries to repeat the results and QC things: + +saveRDS(p,"./2_Scripts/standalone scripts/QC/p.rds") +saveRDS(i,"./2_Scripts/standalone scripts/QC/i.rds") +# +# If you run the model using scenario 1 (rather than base case scenario 0) +# It will be a partitioned survival model. in that case it's a good idea to +# save a backup of p and i under a different name so that you have an input +# set for the paritioned survival model to hand at any time. +# +# saveRDS(p,"./2_Scripts/standalone scripts/QC/p_PS.rds") +# saveRDS(i,"./2_Scripts/standalone scripts/QC/i_PS.rds") + +# p <- readRDS("./2_Scripts/standalone scripts/QC/p.rds") +# i <- readRDS("./2_Scripts/standalone scripts/QC/i.rds") + +# Make this NA to run single core: +tick <- Sys.time() +pf <- f_pf_computePF( + p = p, + struct = p$basic$structure, + verbose = FALSE, + plots = FALSE, + just_pop = p$basic$pops_to_run, + just_nlines = NULL, + just_seq = NULL +) +print(Sys.time() - tick) + + +# if (is.na(keep_free_cores)) { +# plan(sequential) +# } else { +# plan(multisession(workers = max(availableCores()-keep_free_cores,1))) +# } + + + + +# 3.6.2 Compiling model results ------------------------------------------- + +# Depending on which model structure is used to compute patient flow, the results +# are processed in different ways. The below function simply routes the pf object +# to the appropriate function to process the results, and returns an object +# res containing the results: + +Scenario_number <- i$R_Scenario_num + +if(Scenario_number == 0) {detail_level <- 5} else {detail_level <- 4} +# Detail level guide: + +# State transition +# 1 is top line results by sequence (costs, QALYs, LYs) and weighted average results +# 2 is just the top line table and non-dominated weighted average incremental results +# 3 includes weighted average pairwise comparisons +# 4 includes incremental analysis by sequence +# 5 includes trace plots + +# PartSA analysis +# 1 is top line results for the PartSA analysis (costs, QALYs, LYs) +# 2 is incremental analysis as well +# 3 includes breakdown tables as well +# 4 includes state residency plots + +res <- f_res_compute_results( + pf = pf, + structure = p$basic$structure, + p = p, + detail_level = detail_level, + vs_mol = 1, + no_active_lines = i$R_max_trt_lines +) + + + + +# 3.6.3 Calculate severity modifier ----------------------------------- + +# The severity modifier for the weighted average first-line treatment comparison +# uses the best available treatment which is not nivo cabo (molecule 1) for the +# first 3 populations. +# +# This is because this is the best (i.e. most discounted QALYs) available 1L trt +# pathway set. +# +# Calculation is only provided for the state transition model + + +if (p$basic$structure == "State transition") { + population_numbers <- if(sum(p$basic$pops_to_run == 1:3)>0){1:3} else{1:6} + res$mk$qaly_shortfall_1_to_3 <- lapply(population_numbers, function(npa_pop) { + + lu_pop <- p$basic$lookup$pop_map + lu_rpop <- p$basic$lookup$ipd$pop + + # npa_pop is overall population, we need to look up risk population from it: + + risk_pop_n <- lu_pop[match(npa_pop,lu_pop$Overall.population.number),]$Risk.population.number + risk_pop <- lu_rpop[match(risk_pop_n,lu_rpop$Number),]$Description + + i$R_table_ptchar <- as.data.table(i$R_table_ptchar) + + if (i$dd_age_sex_source == "Mean") { + + # So for this risk population, we need the baseline characteristics: + bl_chars <- i$R_table_ptchar[Population == risk_pop & Treatment.line == 1,] + bl_age <- bl_chars$Starting.age..years..Mean + bl_male <- 1-bl_chars$Starting...female.Mean + + } else { + + patient_sex_age_IPD <- as.data.table(i$R_table_patientagesex) + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="M","male") + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="F","female") + + bl_age <- patient_sex_age_IPD[Line ==1]$Age + bl_male <- patient_sex_age_IPD[Line ==1]$Gender + + } + + pna_txt <- names(res$wa_summarised)[npa_pop] + + tab <- res$wa_summarised[[pna_txt]][L1 != 1,] + + met <- tab[which.max(qalys),] + + q_met <- met$qalys + comp_no_met <- met$L1 + + out <- calc_severity_modifier( + age = bl_age, + sex = bl_male, + .patient_level = if(i$dd_age_sex_source == "Mean") {FALSE} else {TRUE}, + qalys = q_met, + .i = i, + .p = p + ) + + out <- cbind(out, SOC = comp_no_met) + + return(out) + +}) +} + + + + +# 3.6.4 Saving the results ------------------------------------------------------ + +# the results are in list format so should be saved as an R list file (.rds) +# The naming of the file should reflect the model structure used. The file +# produced has a time stamp to avoid overwriting previous results files. + +Scenario_name <- i$R_Scenario_name # Use ST for state transition, PS for Partitioned survival, LP for list price, cPAS for cPAS +Scenario_number <- i$R_Scenario_num +Run_date <- date() +if (p$basic$structure == "State transition") { + saveRDS(res, paste0("./4_Output/ST_Scenario ",Scenario_number,"_",i$dd_drug_price_options,gsub(":","_",Run_date),".rds")) +} else { + saveRDS(res, paste0("./4_Output/PartSA_Scenario ",Scenario_number,"_",i$dd_drug_price_options,gsub(":","_",Run_date),".rds")) +} + + + + + + + + + + + + +# 3.6.5 Outputting results to Word ------------------------------------------------------ + +# Outputting the results to word requires a series of functions to produce component +# parts to go in the report, and then consolidating them all into the requested output. + +# Get the functions to produce the word document output: + +# Produce an automatically generated word document with required results tables +# and automatically name it using the variables that are already inside of +# i and p (including structure, which is located in p$basic$structure. Note +# that this comes from i$dd_model_struct, i.e. named range dd_model_struct from excel. +# please ensure that this is updated for PS model scenarios): + +Word_width_inches <- 29.7*0.3937 + +f_res_ProduceWordDoc( + p = p, + res = res, + Scenario_name = i$R_Scenario_name, + Scenario_number = i$R_Scenario_num, + price_options = i$dd_drug_price_options, + Run_date = Run_date, + word_template_location = "./3_Functions/reporting/empty results doc.docx", + Word_width_inches = 29.7*0.3937, + auto_save = TRUE, + verbose = TRUE +) + + + + +# END OF CODE ------------------------------------------------------- + + +#### Additional changes were originally planned during Phase 2 of this pilot following use for the initial decision problem including +# - Addition of Shiny user interface +# - Genericisation of the code to allow wider use +# - Programming and analysis of model outputs related specifically to sequencing, this may include value of information analyses + +# Unfortunately funding for this has not been confirmed currently. +# If you are interested in discussing or funding further development please contact the PenTAG team at pentag@exeter.ac.uk + + diff --git a/2_Scripts/Readme.Rmd b/2_Scripts/Readme.Rmd new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/2_Scripts/Readme.Rmd @@ -0,0 +1 @@ + diff --git a/2_Scripts/output_script.R b/2_Scripts/output_script.R new file mode 100644 index 0000000..5140368 --- /dev/null +++ b/2_Scripts/output_script.R @@ -0,0 +1,555 @@ +#### 1. Installation ########### +#### This code has been created using R version 4.3.1 +#### All packages used by this model are provided here + +#### Comment out the below section which installs the relevant packages after the first run of the model +# install.packages("shiny", quiet = TRUE) ### the quiet argument is used to avoid warnings appearing in the console (useful for later conversion to web app) +# install.packages("gtools", quiet = TRUE) +# install.packages("openxlsx", quiet = TRUE) +# install.packages("flexsurv", quiet = TRUE) +# install.packages("tidyverse", quiet = TRUE) +# install.packages("data.table", quiet = TRUE) +# install.packages("heemod", quiet = TRUE) +# install.packages("logOfGamma", quiet = TRUE) +# install.packages("ggplot2", quiet = TRUE) +# install.packages("survminer", quiet = TRUE) +# install.packages("officer", quiet = TRUE) +# install.packages("officedown", quiet = TRUE) +# install.packages("magrittr", quiet = TRUE) +# install.packages("Hmisc", quiet = TRUE) +# install.packages("future.apply", quiet = TRUE) +# install.packages("crosstable", quiet = TRUE) +# install.packages("flextable", quiet = TRUE) +# install.packages("stringr", quiet = TRUE) +# install.packages("BCEA", quiet = TRUE) +# install.packages("collapse", quiet = TRUE) +# install.packages("scales", quiet = TRUE) +# install.packages("Matrix", quiet = TRUE) +# install.packages("dplyr", quiet = TRUE) + +### Loading libraries + +#### This section needs to be run every time and calls each package from the library +library(shiny, quiet = TRUE) +library(gtools, quiet = TRUE) +library(openxlsx, quiet = TRUE) +library(flexsurv, quiet = TRUE) +library(tidyverse, quiet = TRUE) +library(data.table, quiet = TRUE) +library(heemod, quiet = TRUE) +library(logOfGamma, quiet = TRUE) +library(ggplot2, quiet = TRUE) +library(survminer, quiet = TRUE) +library(officer, quiet = TRUE) +library(officedown, quiet = TRUE) +library(magrittr, quiet = TRUE) +library(Hmisc, quiet = TRUE) +library(future.apply, quiet = TRUE) +library(crosstable, quiet = TRUE) +library(flextable, quiet = TRUE) +library(stringr, quiet = TRUE) +library(BCEA, quiet = TRUE) +library(collapse, quiet = TRUE) +library(scales, quiet = TRUE) +library(Matrix, quiet = TRUE) +library(dplyr, quiet = TRUE) + +#### 2. Loading functions ########### + + +# This variable is used throughout the model to define whether to provide additional outputs useful for QC or not +# The model will take longer to run when this is set to TRUE +qc_mode <- FALSE + + +# This function allows parallel processing (similar to future apply) +plan(multisession) + +options(crosstable_units="cm") + +# 2.1. Excel data extraction functions ----------------------------------------- + +#### These functions are used to extract parameters from the Excel input workbook for use in R +#### During Phase 2 a Shiny front-end will be added to the model which will allow an alternative mechanism to upload these types of inputs + +source(file.path("./3_Functions/excel/extract.R")) + +# 2.2. Treatment sequencing functions ---------------------------------------- + +#### Function: filter to active treatments and lines +##### Takes as an input the defined sequences, evaluation type and line to start the evaluation from +##### Other input is % receiving each subs therapy at each line dependent on previous treatments received +##### Reweights so that the % receiving each treatment sums to 100% within each arm / line being studied +##### Outputs a matrix that has the % receiving each possible combination + +source("./3_Functions/sequencing/sequences.R") + +# 2.3. Survival analysis functions --------------------------------------------- + +# Function: conduct survival analysis +##### by treatment, line, population and outcome fitted survival curves using Flexsurvreg (exp, Weibull, lognormal, loglog, Gompertz, gen gamma) +##### calculation of and adjustment for general population +##### adjustment for treatment effect waning + +source("./3_Functions/survival/Survival_functions.R") +source("./3_Functions/survival/other_cause_mortality.R") +source("./3_Functions/survival/treatment_effect_waning.R") + +# 2.4 Misc functions ---------------------------------------------------------- + +### these functions enable smoother data cleaning and manipulation + +source("./3_Functions/misc/other.R") +source("./3_Functions/misc/shift_and_pad.R") +source("./3_Functions/misc/cleaning.R") + +# 2.4.1 Functions imposing list structures ----------------------------------- + +source("./3_Functions/misc/nesting.R") +source("./3_Functions/misc/discounting.R") +source("./3_Functions/misc/qdirichlet.R") +source("./3_Functions/misc/plotting.R") +source("./3_Functions/misc/structure.R") + + +# 2.5 Utility functions ------------------------------------------------------- + +source("./3_Functions/utility/age_related.R") +source("./3_Functions/costs_and_QALYs/utility_processing.R") + +# 2.6 AE functions -------------------------------------------------------- + +source("./3_Functions/adverse_events/AE_steps.R") + +# 2.7 Cost calculation functions -------------------------------------------- + +source("./3_Functions/costs_and_QALYs/cost_processing.R") + + +# 2.8 State transition modelling functions -------------------------------- + +source("./3_Functions/markov/markov.R") + +# 2.9 Patient flow functions ---------------------------------------------- + +source("./3_Functions/patient_flow/overarching.R") +source("./3_Functions/patient_flow/partitioned_survival.R") +source("./3_Functions/patient_flow/markov.R") +source("./3_Functions/patient_flow/drug_costs.R") +source("./3_Functions/patient_flow/hcru_costs.R") +source("./3_Functions/patient_flow/qalys.R") +source("./3_Functions/patient_flow/ae.R") + + + +# 2.10 Results processing functions --------------------------------------- + +source("./3_Functions/results/incremental_analysis.R") +source("./3_Functions/results/model_averaging.R") +source("./3_Functions/results/partitioned_survival.R") +source("./3_Functions/misc/severity_modifier.R") +source("./3_Functions/results/results_tables.R") + + +# 2.11 Office software outputs -------------------------------------------- + +source("./3_Functions/reporting/word_document_output.R") + +# 3. Model inputs structure -------------------------------------------------- + +# Model inputs should be in a list called i. This list then contains all of the +# inputs for the model, NOT the parameters used to calculate the model. In effect, +# this is a place to store all model information BEFORE it gets boiled down to +# what's needed to run 1 model. +# +# using i allows subsetting by categorisation, which makes things a lot easier +# to find and avoids all long variable names +# +# the structure of i should be by category. There are the following +# categories: +# +# dd - dropdown inputs taken from Excel +# i - parameter inputs taken from Excel +# r_ tables taken from Excel +# List, id and lookup - lists defined and used within the code +# basic - basic inputs (time horizon, cycle length, discount rates, so on so forth) +# surv - survival analysis inputs including raw data +# sequences and seq - inputs and outputs related to the possible sequences of treatments +# cost - drug and hcru costs. All costs are here to keep things together (dosing is not cost) +# util and QALYs - utility and QALY inputs +# misc - misc inputs e.g. graph labelling +# + +#### 3.1 Loading input parameters ########### + +# This model allows two possible structures to be analysed: state transition with a user definable number of lines +# with health states based on time to discontinuation (drug costs) and progression status (quality of life and movement +# between lines) and PartSA with 3 health states (pre-progression, post-progression and death) + +# During Phase 1 of this pilot we use the model to evaluate the decision problem for a single therapy +# (cabo+nivo, defined as molecule 1) starting at 1st line +# During Phase 2 we will adapt this code to evaluate the cost-effectiveness of sequences starting at a user-defined line + +# Inputs to this model need to be downloaded from NICEdocs + +User_types <- c("Submitting company", "NICE", "EAG", "Committee", "NHSE", "Clinical expert", "Patient expert", "Non-intervention stakeholder", "Public") + +# The submitting company are able to see their own CIC and AIC data (marked up blue / yellow in reporting but not anything else: green marking +# green marked data has been either be replaced with 0 [PAS discounts, RWE IPD] or dummy data) +# NICE users will be able to see everything +# Other users will not be able to see any marked data, this is replaced with dummy data + +# The way raw data is fed into the model currently works as follows +# Define the path to where the data file lives using the select file functionality + +# The model then processes the file the user selected + +# There are a number of files which contain raw or intermediate inputs: +# 1. The Excel user interface - this contains information from company data and the UK RWE +# 2. The proportional hazards NMA CODA RDS file - this contains information from company data +# 3. The fractional polynomials NMA RDS file - this contains information from company data +# 4. Either the raw data file containing the pseudo-IPD for all trials for survival analysis (RWE and company data included); or +# 5. The RDS output from the survival analysis using both RWE and company data + + + + +################# START REPORT GENERATION ####################### + +# note - change stem to wherever your sharepoint files are kept. Subsequent lines should work ok +# make sure you have saved the input files into your data folder and changed the names of these in the code below +# search for path to find input files + +stem <- "C:/Users/dl556/" + +rds_file_path <- paste0(stem,"University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files") +output_file_path <- "./4_Output" +scenario_files_path <- paste0(stem,"University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Inputs front end file/Scenario set up files") + +#get list of rds files +files <- list.files(rds_file_path) +files <- files[grepl("Scenario", files)] + +for (f in files) { + # for each rds - does the relevant word file exist? (i.e. already run). If yes skip. + # update list of output files + output_files <- list.files(output_file_path) + output_files <- output_files[grepl("Scenario", output_files)] + + # extract scenario number and PAS/list prices + scenario <- substr(f, 9, regexpr("price", f)[1]-2) + # check if word file already exists, if so skip + if (length(output_files)>0) { + if (sum(grepl(scenario, output_files)) > 0) { + cat(f,": word output report already exists. Skipping.\n") + next + } + } + #match to correct input file (scenario number and PAS/list) + scenario_nr <- as.numeric(substr(scenario,1,regexpr("_", scenario)[1]-1)) + price_type <- substr(scenario,regexpr("_", scenario)[1] + 1, nchar(scenario)) + + if (price_type == "List") { + price_directory <- "/List price" + } else if (price_type == "PAS") { + price_directory <- "/PAS price" + } else { + stop("Failed to itentify price type for scenario") + } + + excel_path <- paste0(scenario_files_path, price_directory,"/Scenario ", scenario_nr, ".xlsm") + + if (!file.exists(excel_path)) { + stop("Unable to find scenario inputs file ", excel_path) + } + + # The first part of this code pulls all of the named ranges from the excel workbook, expand the parameters table + + i <- f_excel_extract(excel_path, verbose = TRUE) + i <- c(i,f_excel_cleanParams(i$R_table_param)) + + + + # Set which decision problem to look at, initially functionality has been geared towards the decision problem for cabozantinib plus nivolumab + i$decision_problem <- "cabo+nivo" + + # We then create a place for identifiers. Adding in an object to i full of lookup tables makes automated translation + # possible even when one doesn't know the number of items ex ante, or how they combine. + # + # If the lookup table is correct one can translate id numbers to text strings which are + # consistent throughout the entire model. This is extremely useful as the model can + # be expanded to any number of treatments and potentially even any number of lines + # (up to a reasonable maximum) + + i$id <- list(ipd = list()) + i$lookup <- list(ipd = list()) + + # Add distribution names to i + # This model only includes standard parametric distributions as more complex distributions were not deemed to be required for the included treatments + + i$distnames <- + c( + gengamma = "gengamma", + exp = "exp", + weibull = "weibull", + lnorm = "lnorm", + gamma = "gamma", + gompertz = "gompertz", + llogis = "llogis" + ) + + + + # The next step is to then "tidy up" i into another object, p. p doesn't necessarily + # have to house everything, only things that will change in PSA + + # PSA is not yet included in this model, this will be added during technical engagement + + # For the future: this is what a PSA would look like. Note that each p in p_psa + # needs to pull through one probabilistic iteration of all parameters. + # + # future_lapply(1:n_psa, function(psa_iteration) { + # generate_pf(p_psa[[psa_iteration]], structure=i$...) + # }) + # + + + p <- f_misc_param_generate_p(i) + + + # Pass this into p so that p can be used to exclusively compute the model: + p$basic$decision_problem <- i$decision_problem + + + + + i$surv <- list() + + #### Read in survival data from Excel workbook + + # Pull out the raw data from the IPD excel book - one named range per treatment at each line + # Each reference curve is defined in Excel as time (weeks), event/censor (event coded as 1, censor as 0), patient group, line, molecule, trial and endpoint + # Pull all of the named ranges from the excel workbook, expand the parameters table + + excel_path2 <- "./1_Data/IPD_Confidential _Rinput.xlsx" + + wb <- f_excel_extract(excel_path2, verbose = TRUE) + + i$surv$pld <- as.data.table(wb$`_xlnm._FilterDatabase`) + + rm(wb) + + # Some small cleaning of the PLD. + i$surv$pld <- i$surv$pld[,list(population,line,molecule,trial,endpoint,timew,event_censor)] + + # Do not allow zero survival times, they have to be at least 1 day. the TUotA is + # weeks, so 1 day is 1/7 weeks: + i$surv$pld[timew ==0,"timew"] <- 1/7 + + # The named range r_pld has numeric identifiers for: + # + # - pop + # - line + # - mol (i.e., regimen - combination therapies are under the same number) + # - trial (trial id WITHIN population line and molecule to set them apart from each other - usually just 1!) + # - endpoint + + # These numeric identifiers are then used to create a nested list of survival regression models and + # extrapolations. The extrapolations are filtered down to the extrapolations that are selected + # within the excel input sheet, but the rest are kept here in i in case of scenario analysis. + # + # Note that the lookup tables in the next section are used to translate these numbers + # into human-readable identifiers. + + # 3.3.2 Data identification ------------------------------------------ + + # There is a lot of nesting involved in this part of the analysis, with population line, regimen trial and endpoint + # making a total of 5 layers of nesting to automatically go through each endpoint for each trial for + # each regimen for each line for each population, perform all regression analyses, produce parameters + # and have an easily identifiable (and therefore programmable) spaces for the results of each analysis + # which can then be spat out into reporting. + + # The first step is to break up r_pld into separate datasets depending on the identifiers. A function + # is used to do this which returns nothing if such data for one id set doesn't exist. + # + # Note that at this stage it is just those contexts which HAVE got PLD which are to be organised. + # For those endpoints and so on that do not have data, a separate step after this one to populate + # every endpoint for every treatment line for every treatment sequence is performed. + + i$id$ipd <- list( + pop = i$r_pld_lookup_pop$Number[!is.na(i$r_pld_lookup_pop$Number)], + line = i$r_pld_lookup_line$Number[!is.na(i$r_pld_lookup_line$Number)], + mol = i$r_pld_lookup_mol$Number[!is.na(i$r_pld_lookup_mol$Number)], + trial = i$r_pld_lookup_trial$Number[!is.na(i$r_pld_lookup_trial$Number)], + endpoint = i$r_pld_lookup_endpoint$Number[!is.na(i$r_pld_lookup_endpoint$Number)] + ) + + names(i$id$ipd$pop) <- paste0("pop_" , i$id$ipd$pop) + names(i$id$ipd$line) <- paste0("line_" , i$id$ipd$line) + names(i$id$ipd$mol) <- paste0("mol_" , i$id$ipd$mol) + names(i$id$ipd$trial) <- paste0("trial_" , i$id$ipd$trial) + names(i$id$ipd$endpoint) <- paste0("endpoint_", i$id$ipd$endpoint) + + + # to see this, we have: + #i$id$ipd + + # Generating the same structure but with the translation table from number to + # text: + + i$lookup$ipd <- list( + pop = data.table(i$r_pld_lookup_pop)[Description != 0], + line = data.table(i$r_pld_lookup_line)[Description != 0], + mol = data.table(i$r_pld_lookup_mol)[Description != 0], + trial = data.table(i$r_pld_lookup_trial)[Description != 0], + endpoint = data.table(i$r_pld_lookup_endpoint)[Description != 0] + ) + + # For treatment line, add a translator for the column in the sequences output: + + i$lookup$ipd$line$seq_col <- paste0("V",2:(nrow(i$lookup$ipd$line)+1)) + i$lookup$ipd$line$R_id <- paste0("line_",1:nrow(i$lookup$ipd$line)) + + i$lookup$dist <- i$r_pld_lookup_dist + + + # This means that you can easily look up things like so: + + # i$lookup$ipd$mol[Number == 1,list(Description,RCC_input_desc)] + # i$lookup$ipd$mol[Number == 2,list(Description,RCC_input_desc)] + # i$lookup$ipd$line[Number == 1,list(Description,RCC_input_desc)] + # i$lookup$ipd$pop[Number == 0,list(Description,RCC_input_desc)] + + # One can also do the opposite, translating input file descriptions into numbers: + + # i$lookup$ipd$mol[RCC_input_desc == "ipi_nivo",list(Description,Number)] + + i$lookup$trt <- i$lookup$ipd$mol$Number + names(i$lookup$trt) <- i$lookup$ipd$mol$RCC_input_desc + names(i$lookup$trt)[length(i$lookup$trt)] <- "BSC" + + # pass to p whenever i$lookup has been populated/updated. + p$basic$lookup <- i$lookup + + + p$basic$lookup$pop_map <- data.table(i$r_overall_lookup_pop) + + + res <- readRDS(paste0(rds_file_path,"/",f)) + + Scenario_number <- i$R_Scenario_num + + Scenario_name <- i$R_Scenario_name # Use ST for state transition, PS for Partitioned survival, LP for list price, cPAS for cPAS + + Run_date <- date() + + # 3.4.2 QALYs ------------------------------------------------------------- + + # Utilities are applied to the disease model by treatment by line and whether the patient is on or off treatment + # Age adjustment is conducted multiplicatively in line with DSU guidance using earlier defined patient characteristics for age and sex + + # Extracting from excel file + + p <- add_population_utility_params(p, psa = FALSE, .i = i) + + # Pre-calculate the population utility norms since they will be the same across + # all sequences (though may vary across populations), and store in p + + i$R_table_ptchar <- as.data.table(i$R_table_ptchar) + + base_utility <- data.frame(cycle = 0:p$basic$th, utility = 1) + if (i$dd_ageadjuutilities == "Yes") { + if (i$dd_age_sex_source == "Mean") { + # We find the row corresponding to line 1 for each relevant population + + # Do a lot of wrangling to get in the format we want... + ptc_L1 <- i$R_table_ptchar[Treatment.line == 1, c(1, 3, 4)] + colnames(ptc_L1) <- c("Population", "age", "sex") + ptc_L1$sex <- 1 - ptc_L1$sex + ptc_L1 <- merge(ptc_L1, i$lookup$ipd$pop, by.x = "Population", by.y = "Description") + ptc_L1 <- ptc_L1[order(ptc_L1$Number), c("age", "sex", "Number")] + ptc_L1 <- split(ptc_L1[, c("age", "sex")], paste0("pop_", ptc_L1$Number)) + + p$util$gpop <- lapply(ptc_L1, function(pop) adjust_utility( + age = pop$age, + sex = pop$sex, + utilities = base_utility, + .patient_level = FALSE, + .p = p + )) + + } else { + # We will only include IPD from line 1, since the population + # norm is applied according to absolute model time rather than + # than time in state. We don't know which population they are + # in, so we will replicate for pop_0, pop_1 and pop_2. + ipd_L1 <- i$R_table_patientagesex$Line == 1 + p$util$gpop <- list() + p$util$gpop$pop_0 <- adjust_utility( + age = i$R_table_patientagesex$Age[ipd_L1], + sex = if_else(i$R_table_patientagesex$Gender[ipd_L1] == "M", "male", "female"), + utilities = base_utility, + .patient_level = TRUE, + .p = p + ) + p$util$gpop$pop_1 <- p$util$gpop$pop_0 + p$util$gpop$pop_2 <- p$util$gpop$pop_0 + } + } else { + p$util$gpop <- list(pop_0 = 1, pop_1 = 1, pop_2 = 1) + } + + i$QALYs <- list() + + i$QALYs$utilities$means <- f_process_utilities(raw_utilities = i$R_table_util, + PSA = FALSE, + samples = FALSE) + # Sample code for PSA + + if (FALSE) { + i$QALYs$utilities$PSA <- f_process_utilities(i$R_table_util, + PSA = TRUE, + samples = 100) + } + + # For the deterministic analysis, pass the means into p for use in the model. + # We now have our population norms for calculating gpop utility multiplier + # compared to baseline as well as our HSUVs by PLM and treatment status. everything + # we need. + # + # For probabilistic settings we'd need to pass along the nth iteration of i$QALYs$utilities$PSA + # into the nth element in p_psa (i.e. copy of p). Due to size, this may need to be done + # one iteration at a time, in which case p_psa <- p and then replacing elements, then + # running the model saves a lot of memory. + p$util$mk <- data.table(i$QALYs$utilities$means) + + f_res_ProduceWordDoc( + p = p, + res = res, + Scenario_name = i$R_Scenario_name, + Scenario_number = i$R_Scenario_num, + price_options = i$dd_drug_price_options, + Run_date = date(), + word_template_location = "./3_Functions/reporting/empty results doc.docx", + Word_width_inches = 29.7*0.3937, + auto_save = TRUE + ) + +} + +# END OF CODE ------------------------------------------------------- + + +#### A note on planned model changes +# This model is currently undergoing internal and external QC via the NICE DSU +# The following changes are planned to the code following technical engagement: +# - Incorporation of PSA +# - Incorporation of functionality to allow scenario analyses around the impact of prior adjuvant IO treatment + + +#### Additional changes will be made during Phase 2 of this pilot following use for the initial decision problem including +# - Addition of Shiny user interface +# - Genericisation of the code to allow wider use +# - Programming and analysis of model outputs related specifically to sequencing, this may include value of information analyses + + diff --git a/2_Scripts/probabilistic_model.R b/2_Scripts/probabilistic_model.R new file mode 100644 index 0000000..3638a8f --- /dev/null +++ b/2_Scripts/probabilistic_model.R @@ -0,0 +1,939 @@ + +# The primary difference with the probabilistic model is that the survival +# analysis (along with several of the other input parameters) are drawn +# probabilistically repeatedly and those randomly drawn numbers +# are fed through the SAME functions that have been used in Model_structure.R +# to ensure consistency. +# +# This is analogous to generating values in an excel model and passing it through +# the same calculation chain as the deterministic model via the patient flow +# sheet. +# +# To achieve this, we generate the list "p" repeatedly, with each "p" being +# a "copy" of the p used in the determinstic model, following exactly the same +# structure, but with random numbers instead of point estimate numbers each time. +# +# By running the same structure of p through the same function to run the model +# we can generate a probabilistic version of the model results, which can then +# be used to capture the uncertainty surrounding the ICER estimates. +# +# !!!PLEASE READ!!! THIS IS IMPORTANT: +# +# However, the ST model is highly computationally intensive. On a typical laptop, +# one run of the determinstic model across 3 different populations for ~150 pathways +# for 1-4 active treatment line on and off treatment tunnel states for 2000+ time cycles +# results in a lot of computations to get the most precise possible estimate +# of the time that patients will spend in each treatment line within a given +# pathway given a risk/sequencing population pairing. This is because it takes +# into account both absolute time and time within treatment line given treatment +# status, resulting in up to 18,000 discrete health states per treatment pathway. +# +# The 4 active treatment line plus BSC pathways have f_markov_calcN(4,p$basic$th) +# or 14619 discrete health states. This is 14619 * p$basic$th or 30,524,472 +# state residency calculations per treatment pathway. +# +# sequencing population 0 has p$seq$n$pop_0[!is.na(line_5),] (54) 4 active treatment +# line pathways, all plus BSC. 14619 * p$basic$th * 54 = 1,648,321,488 state +# residency values, just for overall population 1, and just for those pathways +# with 4 active treatment lines. The total number of values is much larger. +# +# As can be seen from this example, the reason that the model is so computationally +# intensive is the inclusion of the tunnel states for all 2L+ living health states. +# +# The computational time for each 4 ATL treatment pathway is around 2-3 seconds. +# Given a PSA of 1000 iterations, this would likely be something close to: +# +# 2.5 seconds * 54 pathways * 1000 +# +# which would be (2.5*54*1000)/60/60 = 37.5 hours, only for one population +# and only for the 4atl pathways in that population, and only to compute the Markov +# trace... +# +# On top of that, due to the large matrix required to incorporate full memory +# into a sequencing Markov model, the RAM requirement for such a process would +# be extreme. Currently, to run the model using 8 cores requires something in the +# region of 24GB of RAM, but of course considerably improves runtime. +# +# Consequently at this point there are three options which are available for +# generating probabilistic results from the state-transition model. These are: +# +# - Use a supercomputer. (or high-performance computing cluster, HPC) to provide +# hundreds of cores and TBs of RAM. This would reduce runtime to a few hours +# but requires outsourcing of the computations. There are many options available +# including commercial (Microsoft Azure, Amazon AWS), academic (Exeter university, +# Sheffield university, UEA, others) to satisfy this. This would produce +# the most accurate results and would ensure the use of identical code for +# determinstic and iterative analyses. +# - Approximate time in state using exponential approximation for the CURRENT +# decision problem. In this specific case, there are no stopping rules or +# complicated cost dynamics in any 2L+ state. Consequently the tunnels +# are not actually required in this specific simple case. We therefore +# define a non-tunnel trace calculator which operates with vastly reduced +# computation, resulting in a PSA taking a few seconds rather than multiple +# days. +# - Forego a probabilsitic analysis +# +# We would like to avoid not doing a probabilistic analysis, so options 1 and +# 2 are preferable. However, we cannot ensure that a HPC is available for running +# of this application. Therefore, we provide the alternative within this script +# for approximation. +# + + +# Packages and functions -------------------------------------------------- + +#library(shiny, quiet = TRUE) +library(gtools, quiet = TRUE) +library(openxlsx, quiet = TRUE) +#library(flexsurv, quiet = TRUE) +library(tidyverse, quiet = TRUE) +library(data.table, quiet = TRUE) +#library(heemod, quiet = TRUE) +#library(logOfGamma, quiet = TRUE) +library(ggplot2, quiet = TRUE) +library(survminer, quiet = TRUE) +library(officer, quiet = TRUE) +library(officedown, quiet = TRUE) +library(magrittr, quiet = TRUE) +library(Hmisc, quiet = TRUE) +library(future.apply, quiet = TRUE) +#library(crosstable, quiet = TRUE) +#library(flextable, quiet = TRUE) +library(stringr, quiet = TRUE) +#library(BCEA, quiet = TRUE) +library(collapse, quiet = TRUE) +library(scales, quiet = TRUE) +library(Matrix, quiet = TRUE) +library(progressr) +library(pracma) +library(optparse) + +parser_results <- OptionParser() |> + add_option(c("-k", "--job-id"), default = 1L, type = "integer", help = "The job ID in an array") |> + add_option(c("-N", "--iterations-per-job"), default = 2L, type = "integer", help = "The number of PSA iterations for this job") |> + parse_args() + +job_id <- parser_results$`job-id` +n_psa <- parser_results$`iterations-per-job` + +set.seed(job_id) + +# Multi-core processing: +# +# Instructions. +# +# This model is highly RAM intensive. You need a lot of RAM on your computer +# to run this model due to the large amount of very large matrix multiplications +# (up to approximately 15,000 discrete health states in the model). Therefore, +# in order to efficiently run the model, it is a balancing act between RAM +# usage and CPU usage. +# +# Some rough guidance is: +# +# - If you have 8GB of RAM on your computer, you can run this model with 2 cores only +# but it may even be faster to run in series if you have other things open on your +# computer at the same time. Therefore, please set keep_free_cores to NA and run +# the model in series. This is because when the RAM on your computer runs out +# your computer will use the hard-disk instead which is extremely slow. +# - If you have 16GB of RAM on your computer, parallel should be a lot faster. +# On my laptop (I7 8th gen, 16GB RAM, running Linux for low RAM usage) I can +# run with 5 cores whilst using about 12GB of RAM running this model. +# - if you have 24GB or 32GB of RAM, you should be able to run the model with 8 +# and up to around 14 cores before running out of RAM whilst running the model. +# - if you are using a HPC, you should be able to run this model with many cores +# due to the typically large amount of RAM available per core in a HPC +# +# +keep_free_cores <- NA +if (is.na(keep_free_cores)) { + plan(sequential) +} else { + plan(multisession(workers = max(availableCores()-keep_free_cores,1))) +} + +# Other generic settings for the progress bar and units for table widths +handlers("progress") +options(crosstable_units="cm") + +#### 2. Loading functions ########### + + +# This variable is used throughout the model to define whether to provide additional outputs useful for QC or not +# The model will take longer to run when this is set to TRUE +qc_mode <- FALSE + + + +# 2.1. Excel data extraction functions ----------------------------------------- + +#### These functions are used to extract parameters from the Excel input workbook for use in R +#### During Phase 2 a Shiny front-end will be added to the model which will allow an alternative mechanism to upload these types of inputs + +source(file.path("./3_Functions/excel/extract.R")) + +# 2.2. Treatment sequencing functions ---------------------------------------- + +#### Function: filter to active treatments and lines +##### Takes as an input the defined sequences, evaluation type and line to start the evaluation from +##### Other input is % receiving each subs therapy at each line dependent on previous treatments received +##### Reweights so that the % receiving each treatment sums to 100% within each arm / line being studied +##### Outputs a matrix that has the % receiving each possible combination + +source("./3_Functions/sequencing/sequences.R") + +# 2.3. Survival analysis functions --------------------------------------------- + +# Function: conduct survival analysis +##### by treatment, line, population and outcome fitted survival curves using Flexsurvreg (exp, Weibull, lognormal, loglog, Gompertz, gen gamma) +##### calculation of and adjustment for general population +##### adjustment for treatment effect waning + +source("./3_Functions/survival/Survival_functions.R") +source("./3_Functions/survival/other_cause_mortality.R") +source("./3_Functions/survival/treatment_effect_waning.R") +source("./3_Functions/misc/fpnma_fns.R") + +# 2.4 Misc functions ---------------------------------------------------------- + +### these functions enable smoother data cleaning and manipulation + +source("./3_Functions/misc/other.R") +source("./3_Functions/misc/shift_and_pad.R") +source("./3_Functions/misc/cleaning.R") + +# 2.4.1 Functions imposing list structures ----------------------------------- + +source("./3_Functions/misc/nesting.R") +source("./3_Functions/misc/discounting.R") +source("./3_Functions/misc/qdirichlet.R") +source("./3_Functions/misc/plotting.R") +source("./3_Functions/misc/structure.R") + + +# 2.5 Utility functions ------------------------------------------------------- + +source("./3_Functions/utility/age_related.R") +source("./3_Functions/costs_and_QALYs/utility_processing.R") + +# 2.6 AE functions -------------------------------------------------------- + +source("./3_Functions/adverse_events/AE_steps.R") + +# 2.7 Cost calculation functions -------------------------------------------- + +source("./3_Functions/costs_and_QALYs/cost_processing.R") + + +# 2.8 State transition modelling functions -------------------------------- + +source("./3_Functions/markov/markov.R") + +# 2.9 Patient flow functions ---------------------------------------------- + +source("./3_Functions/patient_flow/overarching.R") +source("./3_Functions/patient_flow/partitioned_survival.R") +source("./3_Functions/patient_flow/markov.R") +source("./3_Functions/patient_flow/drug_costs.R") +source("./3_Functions/patient_flow/hcru_costs.R") +source("./3_Functions/patient_flow/qalys.R") +source("./3_Functions/patient_flow/ae.R") + + + +# 2.10 Results processing functions --------------------------------------- + +source("./3_Functions/results/incremental_analysis.R") +source("./3_Functions/results/model_averaging.R") +source("./3_Functions/results/partitioned_survival.R") +source("./3_Functions/misc/severity_modifier.R") +source("./3_Functions/results/results_tables.R") + + + + +# PSA RELATED FUNCTIONS --------------------------------------------------- + +source("./3_Functions/psa/psa functions.R") + + + + +# i and p --------------------------------------------------------------- + +i <- readRDS("./2_Scripts/standalone scripts/QC/i.rds") +p <- readRDS("./2_Scripts/standalone scripts/QC/p.rds") + + +# Replace the components of p with their probabilistic versions, one section +# at a time, starting with the disease model: + +p$basic$jobid <- job_id +p$basic$npsa <- n_psa + +# Component generation ---------------------------------------------------- +# ~ Disease model --------------------------------------------------------- + +# To test - generate all of the parameters for all distributions for all reference +# curve PLMTEs for npsa PSA iterations +i$psa_psm <- f_PSA_drawFSParams( + surv_regs = i$surv$reg, + n_psa = p$basic$npsa, + return_rands = FALSE, + lookups = p$basic$lookup$ipd, + verbose = FALSE +) + +# Great, that works. Now let's introduce a version which filters down to only +# reference curves which are included in the analysis for THIS scenario (i.e. +# according to THIS excel file) + + +# filter down the parameters to only the distributions which have been +# selected in Excel, and remove all reference curves that aren't used in the +# model. +i$psa_psm_filtered <- f_psa_surv_params_filter( + psa_psm_param = i$psa_psm, + excel_eff_table = data.table(i$R_table_eff_data_settings), + lookups = p$basic$lookup +) + +i$psa_psm <- NULL + +# Generate lambdas for all +i$PSA_est_Lambdas <- f_psa_approx_lambda( + psa_params = i$psa_psm_filtered, + method = "sum", + th = p$basic$th +) + +i$PSA_est_Lambdas_disc <- f_psa_approx_lambda( + psa_params = i$psa_psm_filtered, + method = "sum", + th = p$basic$th, + disc = p$basic$discFacQ +) + +# So, when using a lambda computed to match the area under the curve of the +# discounted time in state plot, one comes to a similar time in state when +# just using the lambda approximation method, which is great + +# Using lambdas in the model ---------------------------------------------- + +# First, some simple maths: +# +# Lambda can be translated to transition probability TP like so: +# +# TP_t = 1-s(t)/s(t-1) +# +# Given that the rate is constant with exponential, all TP for a given endpoint +# are equal to TP_t, that is TP = 1-s(t)/s(t-1). +# +# At cycle 1 (given cycle 0 is model start), TP_t = 1-(s(t) / s(t-1)), but s(t-1) +# is known to be 1. Therefore TP = 1-s(1). +# +# Thus, the entire set of lambdas can be converted to TPs by cycling through them +# and applying 1-f_psa_exp(1,lambda) + +# Exponential lines ------------------------------------------------------- + +# The reference curves will all get exponential fits for undiscounted and +# discounted. This will be done using lambda. +# +# then, the relative efficacy network will be applied to them all to get +# the full st set +# +# Then, the "full set" of s(t) will be translated into TPs as normal +# +# Then, instead of M there will be TH small M's which will only have 4, 6, 8 or 10 +# as dim. +# +# These will be used to compute a trace for each PSA run, for each +# +# + +# All of these steps happen per PSA run, and the results that are returned are +# Markov traces. this way the individual traces can be calculated in parallel + +# PSA input set (temporarily just for disease model): +i_psa <- list( + ps = 1:p$basic$npsa, + releff = list( + coda = list( + ph = i$PHNMA$data, + fp = i$FPNMA$data + ), + table = p$releff$excel_table + ), + surv = list( + lambda = i$PSA_est_Lambdas, + lambda_dsic = i$PSA_est_Lambdas_disc, + ref_curves = f_psa_lambda2St(i$PSA_est_Lambdas,0:p$basic$th), + rc_params = i$psa_psm_filtered + ), + cost = list(), + hrql = list(), + ae = list() +) + +# At this point we have all the probabilistic reference curves as well as the CODA +# samples and the relative efficacy table. + +# excel_efficacy_table <- i$R_table_eff_data_settings + +i_psa$releff$table_hr <- f_psa_assumptionsTab_genHRs(excel_efficacy_table = i_psa$releff$table,p$basic$npsa) +i_psa$releff$table_noHR <- i_psa$releff$table[Include.in.this.analysis. == "Yes" & Effectiveness.data.source != "Apply HR to",] + +# Generate the network p$basic$npsa times (this is a big object, and uses a lot of RAM) + +i_psa$releff$blank_network <- f_NMA_generateNetwork(p$basic$id$ipd, p$basic$lookup$ipd) + +# P_PSA ------------------------------------------------------------------- + +# Generate a version of p which contains probabilistic versions of the inputs +# per iteration. Where things have no probabilistic uncertainty they can +# come from the base p as why generate that data thousands of times +# +# Everything in basic is base +# Demo gets generated +# +# +# Notes - +# - The cost object is large (around 3.4GB for 1000 iterations at 40 year TH) +# HOWEVER, we are doing lambda approximation so only require one number for +# all 2L+. This DRASTICALLY reduces the size of this object (to about 850MB) +# +p_psa <- list( + demo = lapply(p$demo$agg, function(popu) { + lapply(popu, function(li) { + lapply(li, function(category) { + if (is.null(category$mean)) { + return(category) + } else if (is.null(category$se)) { + rep(category$mean,p$basic$npsa) + } else { + rep(category$mean,p$basic$npsa) #### note we do not include uncertainty around the cohort makeup in PSA + } + }) + }) + }), + util = f_process_utilities( + raw_utilities = i$R_table_util, + PSA = TRUE, + samples = p$basic$npsa + ), + util_gpop_coefs = lapply(1:p$basic$npsa, function(nested_psa_iteration) { + .p <- add_population_utility_params(list(), psa = TRUE, .i = i) + .p$util$pop_norms + }), + costs = lapply(f_process_cost_data( + drug_and_admin = i$R_table_drug_admin_costs, + per_cycle_costs = i$R_table_MRU, + time_horizon = p$basic$th, + max_trt_lines = p$basic$R_maxlines, + RDI_source = i$dd_sc_RDI, + verbose = FALSE, + samples = p$basic$npsa, + PSA = TRUE) + ,f_psa_lambda_cost), + releff = list() +) + +# Pre-calculate the population utility norms since they will be the same across +# all sequences (though may vary across populations), and store in p + +base_utility <- data.frame(cycle = 0:p$basic$th, utility = 1) +if (i$dd_ageadjuutilities == "Yes") { + if (i$dd_age_sex_source == "Mean") { + # We find the row corresponding to line 1 for each relevant population + + # Do a lot of wrangling to get in the format we want... + ptc_L1 <- i$R_table_ptchar[Treatment.line == 1, c(1, 3, 4)] + colnames(ptc_L1) <- c("Population", "age", "sex") + ptc_L1$sex <- 1 - ptc_L1$sex + ptc_L1 <- merge(ptc_L1, i$lookup$ipd$pop, by.x = "Population", by.y = "Description") + ptc_L1 <- ptc_L1[order(ptc_L1$Number), c("age", "sex", "Number")] + ptc_L1 <- split(ptc_L1[, c("age", "sex")], paste0("pop_", ptc_L1$Number)) + + p_psa$util_gpop <- lapply( + X = 1:p$basic$npsa, + FUN = function(nested_psa_iteration) lapply(ptc_L1, function(pop) adjust_utility( + age = pop$age, + sex = pop$sex, + utilities = base_utility, + .patient_level = FALSE, + .p = + list( + basic = list(cl_y = p$basic$cl_y), + util = list(pop_norms = p_psa$util_gpop_coefs[[nested_psa_iteration]]) + ) + )) + ) + + } else { + # We will only include IPD from line 1, since the population + # norm is applied according to absolute model time rather than + # than time in state. We don't know which population they are + # in, so we will replicate for pop_0, pop_1 and pop_2. + ipd_L1 <- i$R_table_patientagesex$Line == 1 + p_psa$util_gpop <- lapply( + X = 1:p$basic$npsa, + FUN = function(nested_psa_iteration) { + pop_0 <- adjust_utility( + age = i$R_table_patientagesex$Age[ipd_L1], + sex = if_else(i$R_table_patientagesex$Gender[ipd_L1] == "M", "male", "female"), + utilities = base_utility, + .patient_level = TRUE, + .p = + list( + basic = list(cl_y = p$basic$cl_y), + util = list(pop_norms = p_psa$util_gpop_coefs[[nested_psa_iteration]]) + ) + ) + list( + pop_0 = pop_0, + pop_1 = pop_0, + pop_2 = pop_0 + ) + } + ) + } +} else { + p_psa$util_gpop <- lapply(1:p$basic$npsa, function(nested_psa_iteration) list(pop_0 = 1, pop_1 = 1, pop_2 = 1)) +} + + +# QC stuff - un-comment to investigate: + +# util[, .( +# mean_on = mean(OnTxt), +# lb_on = quantile(OnTxt, 0.025), +# ub_on = quantile(OnTxt, 0.975), +# mean_off = mean(OffTxt), +# lb_off = quantile(OffTxt, 0.025), +# ub_off = quantile(OffTxt, 0.975), +# mean_PFS = mean(PFS), +# lb_PFS = quantile(PFS, 0.025), +# ub_PFS = quantile(PFS, 0.975), +# mean_PD = mean(PD), +# lb_PD = quantile(PD, 0.025), +# ub_PD = quantile(PD, 0.975) +# ), +# by = list(Population, Treatment.line, Molecule)] + +# ~ FPNMA CODA sample ----------------------------------------------------- + +#read in FPNMA PSA parameters +p_psa$releff$PSAcoefficients <- read.csv("./1_Data/osipsa.csv") +p_psa$releff$PSAcoefficients <- rbind(p_psa$releff$PSAcoefficients , read.csv("./1_Data/osopsa.csv")) +p_psa$releff$PSAcoefficients <- rbind(p_psa$releff$PSAcoefficients , read.csv("./1_Data/pfipsa.csv")) +p_psa$releff$PSAcoefficients <- rbind(p_psa$releff$PSAcoefficients , read.csv("./1_Data/pfopsa.csv")) + +#tidy and add exponents +p_psa$releff$PSAcoefficients <- f_FPNMA_tidy_and_add_exponents( + PSAcoefficients = p_psa$releff$PSAcoefficients, + exponents = i$R_table_FPNMA_coefficients +) + +# The coefficients are used to generate HRs for each PSA iteration one at a time +# to avoid generating a huge amount of data, which will increase the RAM requirements +# of the PSA even more, slowing down the model considerably. + + +# ~ one-off costs --------------------------------------------------------- + +# Calculating PSA samples for one off costs + +Oneoffcost_dat <- data.table( + p$costs$oneoff, + SE_cost = as.numeric(data.table(i$R_table_MRU_oneoff)[Type.of.cost != "Treatment initiation\r\n",]$X5) +) + +p_psa$costs$oneoff <- rbindlist(lapply(1:nrow(Oneoffcost_dat), function(param_row) { + id <- Oneoffcost_dat[param_row,] + ooc_m <- id$cost + ooc_se <- id$SE_cost + id$cost <- NULL + id$SE_cost <- NULL + id <- as.data.table(lapply(id, function(x) rep(x,p$basic$npsa))) + id$iteration <- 1:p$basic$npsa + id$cost <- rnorm(p$basic$npsa,ooc_m,ooc_se) + return(id) +})) + +p_psa$costs$oneoff_mk <- p_psa$costs$oneoff[,.(cost = sum(cost)),by=list(Apply.to, iteration)] +Apply_to <- p$costs$oneoff[,.(cost = sum(cost)),by=list(Apply.to)] + +p_psa$costs$oneoff_mk <- lapply(structure( + Apply_to$Apply.to, + .Names = Apply_to$Apply.to +), function(health_state) { + p_psa$costs$oneoff_mk[Apply.to == health_state, ]$cost +}) + +# ~ AEs --------------------------------------------------------------------- + +# Calculating PSA samples for AEs +AE_dat <- data.table( + p$ae$mk$per_cycle, + SE_cost = p$ae$mk$per_cycle$cost * i$i_SE_costs, + SE_util = abs(p$ae$mk$per_cycle$QALYs * i$i_SE_util) +) + +p_psa$ae <- rbindlist(lapply(1:nrow(AE_dat), function(param_row) { + id <- AE_dat[param_row,] + aec_m <- id$cost + aec_se <- id$SE_cost + aeq_m <- id$QALYs + aeq_se <- id$SE_util + id <- id[,list(trt,line,molecule)] + id <- as.data.table(lapply(id, function(x) rep(x,p$basic$npsa))) + id$iteration <- 1:p$basic$npsa + id$cost <- rnorm(p$basic$npsa,aec_m,aec_se) + id$QALYs <- rnorm(p$basic$npsa,aeq_m,aeq_se) + return(id) +})) + + +# Dropping some temp stuff: +rm(AE_dat, Oneoffcost_dat, Apply_to) + + +# Minor prep functions for the patient flow calcs ------------------------- + + + + +# PSA trace process ------------------------------------------------------- + + +# The pre-generation method is extremely RAM intensive, with the network +# taking up 11GB of memory before even extrapolating the curves. +# +# instead, it may be more optimal to produce st on the PSA level, so we +# here attempt to go all the way to that point: +tick <- Sys.time() + +psa_results <- with_progress({ + pr <- progressr::progressor(along = i_psa$ps) + rbindlist(future_lapply(i_psa$ps, future.seed = TRUE, future.chunk.size = 1, function(nested_psa_iteration) { + + psa_iteration <- nested_psa_iteration + (job_id - 1) * n_psa + + # PHNMA CODA sample: + phnma_coda_run <- data.table(data.frame(i_psa$releff$coda$ph)[i_psa$releff$coda$ph$Run == psa_iteration,]) + + # FPNMA CODA sample and HR extrapolation: + p_psa$releff$CODA$FP <- f_generate_FPNMA_coda( + coeffs = p_psa$releff$PSAcoefficients[run == psa_iteration, ], + TH = p$basic$th, + wks_per_month = i$i_mon_to_weeks + ) + + #add in deterministic 2L coda to PSA (PSA only conducted on 1L due to unstable 2L results in FPNMA) + i$FPNMA$means_for_PSA_L2 <- i$FPNMA$means[Line==2] + p_psa$releff$CODA$FP <- rbind(p_psa$releff$CODA$FP, i$FPNMA$means_for_PSA_L2) + + #rebase to add in cabo as 2L + p_psa$releff$CODA$FP <- f_3L_rel_effect_same_as_2L(FPNMAdata = p_psa$releff$CODA$FP) + p_psa$releff$fp_dest <- f_gen_destinations(fp_data = p_psa$releff$CODA$FP) + + # add in reference.trial 2 + p_psa$releff$CODA$FP <- f_add_reference_trial_2(fp_data = p_psa$releff$CODA$FP) + + #eliminate NAs in molecule + p_psa$releff$fp_dest <- i$FPNMA$destinations[!is.na(Molecule), ] + + pr(paste0("PSA iteration #",psa_iteration)) + # Make empty network + network <- f_NMA_linkPHNMA( + network = i_psa$releff$blank_network, + hr_table = phnma_coda_run + ) + + # TEMPORARY: put in the FPNMA sample + network <- f_NMA_linkFPNMA( + network = network, + destinations = p$releff$fp_dest, + hr_table = p$releff$CODA$FP, + time_horizon = p$basic$th + ) + + network <- f_NMA_AddAssumptionsToNetwork( + network = network, + phnma_table = phnma_coda_run, + fpnma_table = p$releff$CODA$FP, + fpnma_destinations = p$releff$fp_dest, + excel_table = i_psa$releff$table, + trial_flag = i$List_eff_datasources[1], + fpnma_flag = i$List_eff_datasources[3], + phnma_flag = i$List_eff_datasources[2], + et_flag = i$List_eff_datasources[4], + ahr_flag = i$List_eff_datasources[5], + verbose = FALSE, + psa_flag = TRUE + ) + + # We are not going to keep the network. We are going to use it to propagate + # and then return only the st object for efficiency + st <- f_releff_PropNetwork( + network = network, + extraps = i_psa$surv$lambda, + dos = 10, + verbose = FALSE, + dist_lookups = p$basic$lookup$dist, + excel_table = i_psa$releff$table, + psa_lambda_flag = TRUE, + psa_iteration = nested_psa_iteration, + psa_params = i_psa$surv$rc_params, + th = p$basic$th + ) + + # Now that S(t) is calculated for this PSA iteration for all PLMTEs, we can now adjust + # all the curves ready for computation of the Markov traces + + # Commented out as adjuvant adjustment not used in base case / PSA + + # if(i$dd_adjforprioradjuvant == "Yes") { + # p$surv$st <- f_surv_adjuvant_HR( + # st = st, + # adjuvant_impact = i$R_table_prior_IO_impact_eff, + # demo_table = p$demo$table, + # lookup = p$basic$lookup, + # verbose = TRUE) + # } + + + if (sum(i$R_table_TE_waning_settings$apply.waning == "Yes") > 0) { + st <- f_surv_twaning_apply( + st_list = st, + tab_waning = data.table(i$R_table_TE_waning_settings), + tab_eff_set = data.table(i$R_table_eff_data_settings), + verbose = FALSE + ) + } + st <- f_surv_gpopadjust( + st = st, + gpop = p$surv$gpop, + method = "hazardmax", + verbose = FALSE + ) + st <- f_surv_PFSxOS(st = st, method = if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) + st <- f_surv_TTDxOS(st, if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) + st <- f_surv_PFSxTTP(st = st,method = "abs") + st <- lapply(st, function(popu) { + popu$line_5 <- popu$line_4 + return(popu) + }) + + + # Now, we want to reduce the amount of data being stored as much as possible as + # the st object is huge. Each st is about 16MB, and 1000 PSA iterations is therefore + # 16GB! + # + # If we can re-compute lambdas for all 2L+ PLMTEs, we can collapse much of + # this data back to being one value from 2000+ This should scale up tremendously. + # + # To do this we will compute 2l+ lambdas and 1L (TP=1-(s(t)/s(t-1))) to reduce + # data need as much as possible! + + tp <- f_psa_collapse_st_lambda2lplus(st = st, th = p$basic$th, disc = FALSE) + + # So, with these tp objects we can now cycle through all the treatment pathways + # one at a time generating the TP matrices. these are time-varying because + # of first-line, but do not include tunnels so are drastically simplified compared + # to the full tunnels. + + # We now do some of the preamble for the deterministic model running in ST mode: + + struct = p$basic$structure + verbose = FALSE + plots = FALSE + just_pop = p$basic$pops_to_run + just_nlines = NULL + just_seq = NULL + + if (!is.null(just_pop)) { + if(0 %in% just_pop) stop ("this is overall population, not risk population, it can't be 0.") + overall_pops <- structure(paste0("pop_",just_pop),.Names=paste0("pop_",just_pop)) + } else { + overall_pops <- structure( + paste0("pop_",p$basic$lookup$pop_map$Overall.population.number), + .Names=paste0("pop_",p$basic$lookup$pop_map$Overall.population.number) + ) + } + rpop <- paste0("pop_",p$basic$lookup$pop_map[match(as.numeric(gsub("pop_","",overall_pops)),p$basic$lookup$pop_map$Overall.population.number),]$Risk.population.number) + + # We essentially compute the PF object in the same way as we did for the + # deterministic ST model, but using lambda approximation to simplify the computation + # of the Markov trace. this removes all tunnels from the model, reducing + # potentially 14,000+ health states to just 10. No sparse matrix multiplication + # is required either. + + PATIENT_FLOW <- f_psa_pf_computePF_mkLambda( + pops = overall_pops, + basic = p$basic, + demo = f_psa_get_it_demo(p_psa$demo, nested_psa_iteration), + sequences = p$seq, + survival = list(gpop = p$surv$gpop, tp = tp), + costs = list(per_cycle = p_psa$costs[[nested_psa_iteration]], one_off = lapply(p_psa$costs$oneoff_mk, "[[", nested_psa_iteration)), + util = list(hsuv = p_psa$util[iteration == nested_psa_iteration,], gpop = p_psa$util_gpop[[nested_psa_iteration]]), + ae = list(one_off = p$ae$duration, per_cycle = p_psa$ae[iteration == nested_psa_iteration], approach = p$ae$approach), + eff_table = p$releff$excel_table, + verbose = TRUE, + include_plots = FALSE, + just_nlines = NULL, + just_seq = NULL + ) + + # We summarise these individual patient flow objects: + res_undisc <- f_pf_mk_summary( + pf_list = PATIENT_FLOW, + disc_undisc = "undisc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y + ) + res_disc <- f_pf_mk_summary( + pf_list = PATIENT_FLOW, + disc_undisc = "disc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y + ) + + # Cost per drug per sequence per population + + # Target - do this for all pathways! + empty_cost_mol_list <- paste0("mol_",p$basic$lookup$trt) + empty_cost_mol_list <- structure(numeric(length(empty_cost_mol_list)),.Names=empty_cost_mol_list) + + # undiscounted and discounted results by treatment pathway: + results_undsic <- lapply(res_undisc, function(popu) { + fbd <- popu$full_breakdowns + + # Get the drug costs per molecule per sequence: + trt_list_L1 <- unlist(lapply(fbd, function(trt_sq) trt_sq$numb[1])) + trt_list <- lapply(fbd, function(trt_sq) paste0("mol_",trt_sq$numb)) + cost_list <- lapply(fbd, function(trt_sq) trt_sq$cost[,"drug"]) + drug_cost <- data.table(do.call( + rbind, + lapply(structure(1:length(cost_list),.Names=names(trt_list)), function(trt_sq) { + cl <- cost_list[[trt_sq]] + names(cl) <- trt_list[[trt_sq]] + empty_cost_mol_list[names(cl)] <- cl + return(empty_cost_mol_list) + }) + )) + + # now we need other costs, which are nicely in breakdowns + bdt <- cbind(popu$breakdowns,drug_cost) + bdt$drug <- NULL + bdt$L1 <- trt_list_L1 + + bdt[,`:=`(other_costs = admin + mru_on + mru_off + ae_cost + eol, qaly = qaly + ae_qaly)] + bdt[,`:=`(admin = NULL, mru_on = NULL, mru_off = NULL, ae_cost = NULL, eol = NULL, ae_qaly = NULL)] + + # Now we just need life years and weightings: + lybd <- popu$ly$breakdown[,.(LY = sum(L1_on,L1_off,BSC,L2_on,L2_off,L3_on,L3_off,L4_on,L4_off,na.rm = TRUE)),by="trt"] + + model_breakdown <- merge.data.table(bdt,lybd) + rm(bdt) + rm(lybd) + + # Now we have all the results we need for this PSA iteration consolidated + # together in one table. However, it's too granular and we haven't + # merged in the weightings yet. + model_breakdown + + }) + results_disc <- lapply(res_disc, function(popu) { + fbd <- popu$full_breakdowns + + # Get the drug costs per molecule per sequence: + trt_list_L1 <- unlist(lapply(fbd, function(trt_sq) trt_sq$numb[1])) + trt_list <- lapply(fbd, function(trt_sq) paste0("mol_",trt_sq$numb)) + cost_list <- lapply(fbd, function(trt_sq) trt_sq$cost[,"drug"]) + drug_cost <- data.table(do.call( + rbind, + lapply(structure(1:length(cost_list),.Names=names(trt_list)), function(trt_sq) { + cl <- cost_list[[trt_sq]] + names(cl) <- trt_list[[trt_sq]] + empty_cost_mol_list[names(cl)] <- cl + return(empty_cost_mol_list) + }) + )) + + # now we need other costs, which are nicely in breakdowns + bdt <- cbind(popu$breakdowns,drug_cost) + bdt$drug <- NULL + bdt$L1 <- trt_list_L1 + + bdt[,`:=`(other_costs = admin + mru_on + mru_off + ae_cost + eol, qaly = qaly + ae_qaly)] + bdt[,`:=`(admin = NULL, mru_on = NULL, mru_off = NULL, ae_cost = NULL, eol = NULL, ae_qaly = NULL)] + + # Now we just need life years and weightings: + + model_breakdown <- bdt + rm(bdt) + + # Now we have all the results we need for this PSA iteration consolidated + # together in one table. However, it's too granular and we haven't + # merged in the weightings yet. + model_breakdown + + }) + + # These two tables are by drug costs, other costs, QALYs, LYs and some ids. + # We can cycle through the different overall populations and merge it all together + # into one big table for this PSA iteration. This table then gets merged with + # the tables for all the other iterations into one table containing all the + # necessary PSA results for post evaluation. + pop_lab <- names(results_disc) + names(pop_lab) <- pop_lab + return(rbindlist(lapply(pop_lab, function(pop_name) { + tab_with_ly <- merge.data.table(results_disc[[pop_name]],results_undsic[[pop_name]][,list(trt_n,LY)]) + tab_with_ly$oo_pop <- as.numeric(gsub("pop_","",pop_name)) + tab_with_ly$iteration <- psa_iteration + tab_with_ly + }))) + })) +}) +tock <- Sys.time() - tick +print(tock) + +# Save the full table which contains results for ALL individual treatment pathways: +saveRDS(psa_results, paste0("./4_Output/PSA_output_",job_id,gsub(":","_",date()),".rds")) + + + +# Model averaging --------------------------------------------------------- + +# Now that the PSA has finished for all individual pathways for all PSA iterations, +# we can use that along with information on the market shares of different +# subsequent treatments, AND the uncertainty around those to generate +# a probabilistic weighted average model results representing ALL treatment pathways +# that is consolidated by first-line therapy relevant to this decision problem. + + +# Note - the Exeter HPC cannot deal with the character "→" and encodes it as +# correct encoding error in output from HPC. It's fine to leave this uncommented +# as if the command doesn't find the character it won't correct it. +psa_results$trt_n <- gsub("b\006\022","\u2192",psa_results$trt_n) +psa_results$trt <- gsub("b\006\022","\u2192",psa_results$trt) + +# Compute weighted average summary results, including weightings, weighted, weighted average +# and mean weighted average results: +wa_model <- f_psa_computeWAModelRes( + R_table_sub_txts_prop_n_costs = i$R_table_sub_txts_prop_n_costs, + sims = n_psa, + lookups = p$basic$lookup, + psa_results = psa_results, + PSA = TRUE +) + +saveRDS(wa_model, paste0("./4_Output/PSA_output_weighted_",job_id,gsub(":","_",date()),".rds")) + + + diff --git a/2_Scripts/process PSA output.R b/2_Scripts/process PSA output.R new file mode 100644 index 0000000..e480d6c --- /dev/null +++ b/2_Scripts/process PSA output.R @@ -0,0 +1,224 @@ +#install.packages("devtools") +#devtools::install_github('Sheffield-Accelerated-VoI/SAVI-package') + + +library(BCEA) +library(SAVI) +library(dplyr) +library(ggplot2) +library(purrr) + +#run Model_structure.R 1-1801 to generate p +lu_mol = p$basic$lookup$ipd$mol + +source("./3_Functions/results/incremental_analysis.R") +wa_model <- readRDS("./4_Output/PSA_output_weighted_summaryThu Aug 24 10_59_57 2023.rds") + +results <- wa_model$weighted_average +results$cost <- apply(results[, c("mol_0", "mol_1", "mol_2", "mol_3","mol_4", "mol_5", + "mol_6", "mol_7", "mol_8", "mol_9", "mol_10", + "mol_11", "mol_12", "mol_999", "other_costs")],1, sum) + +results$trt <- NA + +results$trt[results$L1 == 1] <- "Cabozantinib plus nivolumab" +results$trt[results$L1 == 2] <- "Nivolumab plus ipilimumab" +results$trt[results$L1 == 3] <- "Lenvatinib plus pembrolizumab" +results$trt[results$L1 == 5] <- "Pazopanib" +results$trt[results$L1 == 6] <- "Tivozanib" +results$trt[results$L1 == 7] <- "Sunitinib" +results$trt[results$L1 == 8] <- "Cabozantinib" + + +results <- results[,c("dd_drug_price_options", "oo_pop", "L1", "trt", "iteration", "cost", "LY", "qaly")] + +#set up tables list +report_tables <- list() + +# read in formatted data +for (pricing in unique(results$dd_drug_price_options)) { + for (pop in unique(results$oo_pop)) { + results2 <- results[(results$dd_drug_price_option == pricing & + results$oo_pop == pop),] + + cost <- results2[,c(3,5,6)] + cost <- spread(cost, key = "L1", value = "cost") + cost <- cost[,-1] + + LY <- results2[,c(3,5,7)] + LY <- spread(LY, key = "L1", value = "LY") + LY <- LY[,-1] + + qaly <- results2[,c(3,5,8)] + qaly <- spread(qaly, key = "L1", value = "qaly") + qaly <- qaly[,-1] + + data <- list(raw = results2, + cost = cost, + LY = LY, + qaly = qaly) + rm(cost, LY, qaly) + + report_tables[[pricing]][[pop]] <- list(data = data) + names(report_tables[[pricing]])[pop] <- paste0("pop",pop) + } +} + +# create BCEA objects +for (pricing in unique(names(report_tables))) { + #pricing <- unique(names(report_tables))[1] + for (pop in unique(names(report_tables[[pricing]]))) { + #pop <- unique(names(report_tables[[pricing]]))[1] + interventions <- lu_mol[match(colnames(report_tables[[pricing]][[pop]][["data"]][["cost"]]), lu_mol$Number)]$Description + report_tables[[pricing]][[pop]][["bcea"]][["LY"]] <- + bcea(eff = as.matrix(report_tables[[pricing]][[pop]][["data"]][["LY"]]), + cost = as.matrix(report_tables[[pricing]][[pop]][["data"]][["cost"]]), + ref = 1, + interventions = interventions, + Kmax = 60000) + + report_tables[[pricing]][[pop]][["bcea"]][["qaly"]] <- + bcea(eff = as.matrix(report_tables[[pricing]][[pop]][["data"]][["qaly"]]), + cost = as.matrix(report_tables[[pricing]][[pop]][["data"]][["cost"]]), + ref = 1, + interventions = interventions, + Kmax = 60000) + } +} + +#create tables +for (pricing in unique(names(report_tables))) { + #pricing <- unique(names(report_tables))[1] + for (pop in unique(names(report_tables[[pricing]]))) { + #pop <- unique(names(report_tables[[pricing]]))[1] + + data <- report_tables[[pricing]][[pop]][["data"]] + totals <- cbind(t(rbind(apply(data$cost,2,mean), + apply(data$cost,2,quantile, probs = c(0.025, 0.975)))), + t(rbind(apply(data$LY,2,mean), + apply(data$LY,2,quantile, probs = c(0.025, 0.975)))), + t(rbind(apply(data$qaly,2,mean), + apply(data$qaly,2,quantile, probs = c(0.025, 0.975))))) + totals <- cbind(as.numeric(rownames(totals)), totals) + + colnames(totals) <- c("L1","Cost_mean","Cost_2.5","Cost_97.5", + "LY_mean","LY_2.5","LY_97.5", + "qaly_mean","qaly_2.5","qaly_97.5") + table_for_increments <- totals[,c("L1","Cost_mean","qaly_mean","LY_mean")] + colnames(table_for_increments) <- c("L1", "costs", "qalys", "ly") + table_for_increments <- as.data.table(table_for_increments) + + incremental <- f_res_mk_incremental(res_d = table_for_increments, + res_ud = table_for_increments, + lu_mol = lu_mol, + produce_plot = FALSE, + no_active_lines = 4, + output_weighted = "Yes")$non_dominated + + colnames(incremental)[colnames(incremental) == "L1"] <- "trt" + incremental$L1 <- lu_mol[match(incremental$trt, lu_mol$Description)]$Number + table_for_increments$trt <- lu_mol[match(table_for_increments$L1, lu_mol$Number)]$Description + table_for_increments <- merge(table_for_increments, incremental, all.x = TRUE) + table_for_increments <- table_for_increments[order(table_for_increments$costs),] + table_for_increments <- table_for_increments[,c("L1","trt", "costs", "qalys", "ly", "ic", "iq", "il", "ICER")] + + #for each increment calculate 95%CrI + #get comparisons + table_for_increments$il_ub <- table_for_increments$il_lb <- + table_for_increments$iq_ub <- table_for_increments$iq_lb <- + table_for_increments$ic_ub <- table_for_increments$ic_lb <- NA + + comparisons <- table_for_increments$L1[!is.na(table_for_increments$ic)] + + if (length(comparisons)<2) stop("Less than one valid incremental pair") + for (n in 2:length(comparisons)) { + inc <- data.frame(cost = data$cost[[which(comparisons[n] == names(data$cost))]] - + data$cost[[which(comparisons[n-1] == names(data$cost))]], + + ly = data$LY[[which(comparisons[n] == names(data$LY))]] - + data$LY[[which(comparisons[n-1] == names(data$LY))]], + + qaly = data$qaly[[which(comparisons[n] == names(data$qaly))]] - + data$qaly[[which(comparisons[n-1] == names(data$qaly))]]) + + if (round(mean(inc$cost),0) != round(table_for_increments$ic[table_for_increments$L1 == comparisons[n]],0)) stop("Error in mean increments") + + inc <- apply(inc, 2, quantile,c(0.025, 0.975)) + table_for_increments$ic_lb[table_for_increments$L1 == comparisons[n]] <- inc["2.5%","cost"] + table_for_increments$ic_ub[table_for_increments$L1 == comparisons[n]] <- inc["97.5%","cost"] + table_for_increments$il_lb[table_for_increments$L1 == comparisons[n]] <- inc["2.5%","ly"] + table_for_increments$il_ub[table_for_increments$L1 == comparisons[n]] <- inc["97.5%","ly"] + table_for_increments$iq_lb[table_for_increments$L1 == comparisons[n]] <- inc["2.5%","qaly"] + table_for_increments$iq_ub[table_for_increments$L1 == comparisons[n]] <- inc["97.5%","qaly"] + } + totals <- merge(totals,table_for_increments) + + #some final tests for consistency + if (sum(totals$Cost_mean != totals$costs) + sum(totals$LY_mean != totals$ly) + + sum(totals$qaly_mean != totals$qalys)) stop("Inconsistency in incremental results") + + totals <- totals[,c("L1", "trt", "Cost_mean", "Cost_2.5", "Cost_97.5", + "LY_mean", "LY_2.5", "LY_97.5", + "qaly_mean", "qaly_2.5", "qaly_97.5", + "ic","ic_lb","ic_ub", + "il", "il_lb", "il_ub", + "iq", "iq_lb", "iq_ub", "ICER")] + totals <- totals[order(totals$Cost_mean),] + + compact <- totals + compact[,c("Cost_mean", "Cost_2.5", "Cost_97.5", + "ic","ic_lb","ic_ub", + "ICER")] <- round(compact[,c("Cost_mean", "Cost_2.5", "Cost_97.5", + "ic","ic_lb","ic_ub", + "ICER")],0) + compact[,c("LY_mean", "LY_2.5", "LY_97.5", + "qaly_mean", "qaly_2.5", "qaly_97.5", + "il", "il_lb", "il_ub", + "iq", "iq_lb", "iq_ub")] <- round(compact[,c("LY_mean", "LY_2.5", "LY_97.5", + "qaly_mean", "qaly_2.5", "qaly_97.5", + "il", "il_lb", "il_ub", + "iq", "iq_lb", "iq_ub")],3) + + compact$cost <- paste0(compact$Cost_mean," (", compact$Cost_2.5, ", ", compact$Cost_97.5, ")") + compact$LY <- paste0(compact$LY_mean," (", compact$LY_2.5, ", ", compact$LY_97.5, ")") + compact$qaly <- paste0(compact$qaly_mean," (", compact$qaly_2.5, ", ", compact$qaly_97.5, ")") + compact$ic <- paste0(compact$ic," (", compact$ic_lb, ", ", compact$ic_ub, ")") + compact$il <- paste0(compact$il," (", compact$il_lb, ", ", compact$il_ub, ")") + compact$iq <- paste0(compact$iq," (", compact$iq_lb, ", ", compact$iq_ub, ")") + + compact <- compact[,c(c("L1", "trt", "cost", "LY", "qaly", + "ic","il","iq", "ICER"))] + report_tables[[pricing]][[pop]][["tables"]] <- list(totals = totals, + compact = compact) + + } +} + +write.csv(report_tables$`List price`$pop1$tables$compact, "./4_output/table_list_pop1_.csv") +write.csv(report_tables$`List price`$pop2$tables$compact, "./4_output/table_list_pop2_.csv") +write.csv(report_tables$`List price`$pop3$tables$compact, "./4_output/table_list_pop3_.csv") +write.csv(report_tables$`PAS price`$pop1$tables$compact, "./4_output/table_PAS_pop1_.csv") +write.csv(report_tables$`PAS price`$pop2$tables$compact, "./4_output/table_PAS_pop2_.csv") +write.csv(report_tables$`PAS price`$pop3$tables$compact, "./4_output/table_PAS_pop3_.csv") + +x <- multi.ce(report_tables$`List price`$pop1$bcea$qaly) +ceac.plot(x) +x <- multi.ce(report_tables$`List price`$pop2$bcea$qaly) +ceac.plot(x) +x <- multi.ce(report_tables$`List price`$pop3$bcea$qaly) +ceac.plot(x) +x <- multi.ce(report_tables$`PAS price`$pop1$bcea$qaly) +ceac.plot(x) +x <- multi.ce(report_tables$`PAS price`$pop2$bcea$qaly) +ceac.plot(x) +x <- multi.ce(report_tables$`PAS price`$pop3$bcea$qaly) +ceac.plot(x) + +# plot(bcea_oo_pop1_LY) +# ceplane.plot(bcea_oo_pop1_LY, wtp = 20000) +# eib.plot(bcea_oo_pop1_LY) +# contour(bcea_oo_pop1_LY) +# +# bcea_oo_pop1_LY <- multi.ce(bcea_oo_pop1_LY) +# ceac.plot(bcea_oo_pop1_LY) +# eib.plot(bcea_oo_pop1_LY) \ No newline at end of file diff --git a/2_Scripts/standalone scripts/PSA deterministic lambda/testing lambda.R b/2_Scripts/standalone scripts/PSA deterministic lambda/testing lambda.R new file mode 100644 index 0000000..fa19cd6 --- /dev/null +++ b/2_Scripts/standalone scripts/PSA deterministic lambda/testing lambda.R @@ -0,0 +1,784 @@ +# this script uses a saved input set p and i to perform a controlled test +# of model comparability to the deterministic model extrapolated with "full tunnels" +# for all states. + +# preamble ---------------------------------------------------------------- + +# ~ Packages and functions -------------------------------------------------- + +#library(shiny, quiet = TRUE) +library(gtools, quiet = TRUE) +library(openxlsx, quiet = TRUE) +#library(flexsurv, quiet = TRUE) +library(tidyverse, quiet = TRUE) +library(data.table, quiet = TRUE) +#library(heemod, quiet = TRUE) +#library(logOfGamma, quiet = TRUE) +library(ggplot2, quiet = TRUE) +library(survminer, quiet = TRUE) +library(officer, quiet = TRUE) +library(officedown, quiet = TRUE) +library(magrittr, quiet = TRUE) +library(Hmisc, quiet = TRUE) +library(future.apply, quiet = TRUE) +#library(crosstable, quiet = TRUE) +#library(flextable, quiet = TRUE) +library(stringr, quiet = TRUE) +#library(BCEA, quiet = TRUE) +library(collapse, quiet = TRUE) +library(scales, quiet = TRUE) +library(Matrix, quiet = TRUE) +library(progressr) +library(pracma) + + +# Multi-core processing: +# +# Instructions. +# +# This model is highly RAM intensive. You need a lot of RAM on your computer +# to run this model due to the large amount of very large matrix multiplications +# (up to approximately 15,000 discrete health states in the model). Therefore, +# in order to efficiently run the model, it is a balancing act between RAM +# usage and CPU usage. +# +# Some rough guidance is: +# +# - If you have 8GB of RAM on your computer, you can run this model with 2 cores only +# but it may even be faster to run in series if you have other things open on your +# computer at the same time. Therefore, please set keep_free_cores to NA and run +# the model in series. This is because when the RAM on your computer runs out +# your computer will use the hard-disk instead which is extremely slow. +# - If you have 16GB of RAM on your computer, parallel should be a lot faster. +# On my laptop (I7 8th gen, 16GB RAM, running Linux for low RAM usage) I can +# run with 5 cores whilst using about 12GB of RAM running this model. +# - if you have 24GB or 32GB of RAM, you should be able to run the model with 8 +# and up to around 14 cores before running out of RAM whilst running the model. +# - if you are using a HPC, you should be able to run this model with many cores +# due to the typically large amount of RAM available per core in a HPC +# +# +keep_free_cores <- 3 +if (is.na(keep_free_cores)) { + plan(sequential) +} else { + plan(multisession(workers = max(availableCores()-keep_free_cores,1))) +} + +# Other generic settings for the progress bar and units for table widths +handlers("progress") +options(crosstable_units="cm") + +#### 2. Loading functions ########### + + +# This variable is used throughout the model to define whether to provide additional outputs useful for QC or not +# The model will take longer to run when this is set to TRUE +qc_mode <- FALSE + + + +# 2.1. Excel data extraction functions ----------------------------------------- + +#### These functions are used to extract parameters from the Excel input workbook for use in R +#### During Phase 2 a Shiny front-end will be added to the model which will allow an alternative mechanism to upload these types of inputs + +source(file.path("./3_Functions/excel/extract.R")) + +# 2.2. Treatment sequencing functions ---------------------------------------- + +#### Function: filter to active treatments and lines +##### Takes as an input the defined sequences, evaluation type and line to start the evaluation from +##### Other input is % receiving each subs therapy at each line dependent on previous treatments received +##### Reweights so that the % receiving each treatment sums to 100% within each arm / line being studied +##### Outputs a matrix that has the % receiving each possible combination + +source("./3_Functions/sequencing/sequences.R") + +# 2.3. Survival analysis functions --------------------------------------------- + +# Function: conduct survival analysis +##### by treatment, line, population and outcome fitted survival curves using Flexsurvreg (exp, Weibull, lognormal, loglog, Gompertz, gen gamma) +##### calculation of and adjustment for general population +##### adjustment for treatment effect waning + +source("./3_Functions/survival/Survival_functions.R") +source("./3_Functions/survival/other_cause_mortality.R") +source("./3_Functions/survival/treatment_effect_waning.R") +source("./3_Functions/misc/fpnma_fns.R") + +# 2.4 Misc functions ---------------------------------------------------------- + +### these functions enable smoother data cleaning and manipulation + +source("./3_Functions/misc/other.R") +source("./3_Functions/misc/shift_and_pad.R") +source("./3_Functions/misc/cleaning.R") + +# 2.4.1 Functions imposing list structures ----------------------------------- + +source("./3_Functions/misc/nesting.R") +source("./3_Functions/misc/discounting.R") +source("./3_Functions/misc/qdirichlet.R") +source("./3_Functions/misc/plotting.R") +source("./3_Functions/misc/structure.R") + + +# 2.5 Utility functions ------------------------------------------------------- + +source("./3_Functions/utility/age_related.R") +source("./3_Functions/costs_and_QALYs/utility_processing.R") + +# 2.6 AE functions -------------------------------------------------------- + +source("./3_Functions/adverse_events/AE_steps.R") + +# 2.7 Cost calculation functions -------------------------------------------- + +source("./3_Functions/costs_and_QALYs/cost_processing.R") + + +# 2.8 State transition modelling functions -------------------------------- + +source("./3_Functions/markov/markov.R") + +# 2.9 Patient flow functions ---------------------------------------------- + +source("./3_Functions/patient_flow/overarching.R") +source("./3_Functions/patient_flow/partitioned_survival.R") +source("./3_Functions/patient_flow/markov.R") +source("./3_Functions/patient_flow/drug_costs.R") +source("./3_Functions/patient_flow/hcru_costs.R") +source("./3_Functions/patient_flow/qalys.R") +source("./3_Functions/patient_flow/ae.R") + + + +# 2.10 Results processing functions --------------------------------------- + +source("./3_Functions/results/incremental_analysis.R") +source("./3_Functions/results/model_averaging.R") +source("./3_Functions/results/partitioned_survival.R") +source("./3_Functions/misc/severity_modifier.R") +source("./3_Functions/results/results_tables.R") + +# PSA RELATED FUNCTIONS --------------------------------------------------- + +source("./3_Functions/psa/psa functions.R") + + + +# Load in i and p --------------------------------------------------------- + +# Load a pre-saved i and p set so we know we're starting from the right place +i <- readRDS("./2_Scripts/standalone scripts/QC/i.rds") +p <- readRDS("./2_Scripts/standalone scripts/QC/p.rds") + +# Now we have all the inputs required, EXCEPT for lambdas. we can use the functions +# from + + + +# Replicate prob model but det -------------------------------------------- + +i$psa_psm <- f_PSA_drawFSParams( + surv_regs = i$surv$reg, + n_psa = 1, + return_rands = FALSE, + lookups = p$basic$lookup$ipd, + verbose = FALSE +) + +# Now that the structure is laid down, you can see that the entries containing +# something already have "id" in them, we can use this: +# get_elem(i$psa_psm,"id") + +i$psa_psm <- lapply(i$psa_psm, function(risk_pop) { + lapply(risk_pop, function(tr_line) { + lapply(tr_line, function(mol) { + lapply(mol, function(trial) { + lapply(trial, function(endpoint) { + if (is.null(endpoint)) { + return(NULL) + } else { + id <- endpoint$id + fits <- f_misc_get_plmte(i$surv$reg,id)$fs_fits + draws <- lapply(fits, function(x) x$coefs) + namdist <- names(fits) + names(namdist) <- namdist + draws <- lapply(namdist, function(dis) { + if(dis=="exp") { + as.matrix(draws[[dis]]) + } else { + t(as.matrix(draws[[dis]])) + } + }) + return(list( + id = id, + draws = draws + )) + } + }) + }) + }) + }) +}) + +i$psa_psm_filtered <- f_psa_surv_params_filter( + psa_psm_param = i$psa_psm, + excel_eff_table = data.table(i$R_table_eff_data_settings), + lookups = p$basic$lookup +) + +i$psa_psm <- NULL + +# Generate lambdas for all +i$PSA_est_Lambdas <- f_psa_approx_lambda( + psa_params = i$psa_psm_filtered, + method = "sum", + th = p$basic$th +) + + +i_psa <- list( + ps = 1, + releff = list( + coda = list( + ph = i$PHNMA$data, + fp = i$FPNMA$data + ), + table = p$releff$excel_table + ), + surv = list( + lambda = i$PSA_est_Lambdas, + ref_curves = f_psa_lambda2St(i$PSA_est_Lambdas,0:p$basic$th), + rc_params = i$psa_psm_filtered + ), + cost = list(), + hrql = list(), + ae = list() +) + +# Make blank network - we dont' need to mess about with PSA style HR generation +# in this table as we're using the determinstic values: +i_psa$releff$blank_network <- f_NMA_generateNetwork(p$basic$id$ipd, p$basic$lookup$ipd) + + +p_psa <- list( + demo = p$demo$live, + util = f_process_utilities( + raw_utilities = i$R_table_util, + PSA = FALSE + ), + util_gpop_coefs = { + .p <- add_population_utility_params(list(), psa = FALSE, .i = i) + .p$util$pop_norms + }, + costs = f_psa_lambda_cost(f_process_cost_data( + drug_and_admin = i$R_table_drug_admin_costs, + per_cycle_costs = i$R_table_MRU, + time_horizon = p$basic$th, + max_trt_lines = p$basic$R_maxlines, + RDI_source = i$dd_sc_RDI, + verbose = FALSE, + samples = p$basic$npsa, + PSA = FALSE)), + releff = list() +) + +base_utility <- data.frame(cycle = 0:p$basic$th, utility = 1) +if (i$dd_ageadjuutilities == "Yes") { + if (i$dd_age_sex_source == "Mean") { + # We find the row corresponding to line 1 for each relevant population + + # Do a lot of wrangling to get in the format we want... + ptc_L1 <- i$R_table_ptchar[Treatment.line == 1, c(1, 3, 4)] + colnames(ptc_L1) <- c("Population", "age", "sex") + ptc_L1$sex <- 1 - ptc_L1$sex + ptc_L1 <- merge(ptc_L1, i$lookup$ipd$pop, by.x = "Population", by.y = "Description") + ptc_L1 <- ptc_L1[order(ptc_L1$Number), c("age", "sex", "Number")] + ptc_L1 <- split(ptc_L1[, c("age", "sex")], paste0("pop_", ptc_L1$Number)) + + p_psa$util_gpop <- lapply( + X = 1, + FUN = function(nested_psa_iteration) lapply(ptc_L1, function(pop) adjust_utility( + age = pop$age, + sex = pop$sex, + utilities = base_utility, + .patient_level = FALSE, + .p = + list( + basic = list(cl_y = p$basic$cl_y), + util = list(pop_norms = p_psa$util_gpop_coefs) + ) + )) + ) + + } else { + # We will only include IPD from line 1, since the population + # norm is applied according to absolute model time rather than + # than time in state. We don't know which population they are + # in, so we will replicate for pop_0, pop_1 and pop_2. + ipd_L1 <- i$R_table_patientagesex$Line == 1 + p_psa$util_gpop <- lapply( + X = 1, + FUN = function(nested_psa_iteration) { + pop_0 <- adjust_utility( + age = i$R_table_patientagesex$Age[ipd_L1], + sex = if_else(i$R_table_patientagesex$Gender[ipd_L1] == "M", "male", "female"), + utilities = base_utility, + .patient_level = TRUE, + .p = + list( + basic = list(cl_y = p$basic$cl_y), + util = list(pop_norms = p_psa$util_gpop_coefs) + ) + ) + list( + pop_0 = pop_0, + pop_1 = pop_0, + pop_2 = pop_0 + ) + } + ) + } +} else { + p_psa$util_gpop <- lapply(1:p$basic$npsa, function(nested_psa_iteration) list(pop_0 = 1, pop_1 = 1, pop_2 = 1)) +} + + + + + + + +# Run model deterministic ------------------------------------------------- + + +# PHNMA use means +phnma_coda_run <- p$releff$CODA$PH + +# FPNMA use means +p_psa$releff$CODA$FP <- p$releff$CODA$FP + +# link releff +network <- f_NMA_linkPHNMA( + network = i_psa$releff$blank_network, + hr_table = phnma_coda_run +) +network <- f_NMA_linkFPNMA( + network = network, + destinations = p$releff$fp_dest, + hr_table = p$releff$CODA$FP, + time_horizon = p$basic$th +) + +network <- f_NMA_AddAssumptionsToNetwork( + network = network, + phnma_table = phnma_coda_run, + fpnma_table = p$releff$CODA$FP, + fpnma_destinations = p$releff$fp_dest, + excel_table = i_psa$releff$table, + trial_flag = i$List_eff_datasources[1], + fpnma_flag = i$List_eff_datasources[3], + phnma_flag = i$List_eff_datasources[2], + et_flag = i$List_eff_datasources[4], + ahr_flag = i$List_eff_datasources[5], + verbose = FALSE, + psa_flag = TRUE +) + +st <- f_releff_PropNetwork( + network = network, + extraps = i_psa$surv$lambda, + dos = 10, + verbose = FALSE, + dist_lookups = p$basic$lookup$dist, + excel_table = i_psa$releff$table, + psa_lambda_flag = TRUE, + psa_iteration = 1, + psa_params = i_psa$surv$rc_params, + th = p$basic$th +) + + +# Adjust curves: +if (sum(i$R_table_TE_waning_settings$apply.waning == "Yes") > 0) { + st <- f_surv_twaning_apply( + st_list = st, + tab_waning = data.table(i$R_table_TE_waning_settings), + tab_eff_set = data.table(i$R_table_eff_data_settings), + verbose = FALSE + ) +} +st <- f_surv_gpopadjust( + st = st, + gpop = p$surv$gpop, + method = "hazardmax", + verbose = FALSE +) +st <- f_surv_PFSxOS(st = st, method = if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) +st <- f_surv_TTDxOS(st, if(i$dd_adj_cross_curves == "Use hazards"){"hazardmax"} else{"abs"}) +st <- f_surv_PFSxTTP(st = st,method = "abs") +st <- lapply(st, function(popu) { + popu$line_5 <- popu$line_4 + return(popu) +}) + + +# Make transition probs for lambda approach: +tp <- f_psa_collapse_st_lambda2lplus(st = st, th = p$basic$th, disc = FALSE) + +# top-line inputs: +struct = p$basic$structure +verbose = FALSE +plots = FALSE +just_pop = p$basic$pops_to_run +just_nlines = NULL +just_seq = NULL + +if (!is.null(just_pop)) { + if(0 %in% just_pop) stop ("this is overall population, not risk population, it can't be 0.") + overall_pops <- structure(paste0("pop_",just_pop),.Names=paste0("pop_",just_pop)) +} else { + overall_pops <- structure( + paste0("pop_",p$basic$lookup$pop_map$Overall.population.number), + .Names=paste0("pop_",p$basic$lookup$pop_map$Overall.population.number) + ) +} +rpop <- paste0("pop_",p$basic$lookup$pop_map[match(as.numeric(gsub("pop_","",overall_pops)),p$basic$lookup$pop_map$Overall.population.number),]$Risk.population.number) + + +PATIENT_FLOW <- f_psa_pf_computePF_mkLambda( + pops = overall_pops, + basic = p$basic, + demo = p_psa$demo, + sequences = p$seq, + survival = list(gpop = p$surv$gpop, tp = tp), + costs = list(per_cycle = p_psa$costs, one_off = p$costs$oneoff_mk), + util = list(hsuv = p$util$mk, gpop = p_psa$util_gpop[[1]]), + ae = list(one_off = p$ae$duration, per_cycle = p$ae$mk$per_cycle, approach = p$ae$approach), + eff_table = p$releff$excel_table, + verbose = TRUE, + include_plots = FALSE, + just_nlines = NULL, + just_seq = NULL +) + +res_undisc <- f_pf_mk_summary( + pf_list = PATIENT_FLOW, + disc_undisc = "undisc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y +) +res_disc <- f_pf_mk_summary( + pf_list = PATIENT_FLOW, + disc_undisc = "disc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y +) + +# Cost per drug per sequence per population + +# Target - do this for all pathways! +empty_cost_mol_list <- paste0("mol_",p$basic$lookup$trt) +empty_cost_mol_list <- structure(numeric(length(empty_cost_mol_list)),.Names=empty_cost_mol_list) + +# undiscounted and discounted results by treatment pathway: +results_undsic <- lapply(res_undisc, function(popu) { + fbd <- popu$full_breakdowns + + # Get the drug costs per molecule per sequence: + trt_list_L1 <- unlist(lapply(fbd, function(trt_sq) trt_sq$numb[1])) + trt_list <- lapply(fbd, function(trt_sq) paste0("mol_",trt_sq$numb)) + cost_list <- lapply(fbd, function(trt_sq) trt_sq$cost[,"drug"]) + drug_cost <- data.table(do.call( + rbind, + lapply(structure(1:length(cost_list),.Names=names(trt_list)), function(trt_sq) { + cl <- cost_list[[trt_sq]] + names(cl) <- trt_list[[trt_sq]] + empty_cost_mol_list[names(cl)] <- cl + return(empty_cost_mol_list) + }) + )) + + # now we need other costs, which are nicely in breakdowns + bdt <- cbind(popu$breakdowns,drug_cost) + bdt$drug <- NULL + bdt$L1 <- trt_list_L1 + + bdt[,`:=`(other_costs = admin + mru_on + mru_off + ae_cost + eol, qaly = qaly + ae_qaly)] + bdt[,`:=`(admin = NULL, mru_on = NULL, mru_off = NULL, ae_cost = NULL, eol = NULL, ae_qaly = NULL)] + + # Now we just need life years and weightings: + lybd <- popu$ly$breakdown[,.(LY = sum(L1_on,L1_off,BSC,L2_on,L2_off,L3_on,L3_off,L4_on,L4_off,na.rm = TRUE)),by="trt"] + + model_breakdown <- merge.data.table(bdt,lybd) + rm(bdt) + rm(lybd) + + # Now we have all the results we need for this PSA iteration consolidated + # together in one table. However, it's too granular and we haven't + # merged in the weightings yet. + model_breakdown + +}) +results_disc <- lapply(res_disc, function(popu) { + fbd <- popu$full_breakdowns + + # Get the drug costs per molecule per sequence: + trt_list_L1 <- unlist(lapply(fbd, function(trt_sq) trt_sq$numb[1])) + trt_list <- lapply(fbd, function(trt_sq) paste0("mol_",trt_sq$numb)) + cost_list <- lapply(fbd, function(trt_sq) trt_sq$cost[,"drug"]) + drug_cost <- data.table(do.call( + rbind, + lapply(structure(1:length(cost_list),.Names=names(trt_list)), function(trt_sq) { + cl <- cost_list[[trt_sq]] + names(cl) <- trt_list[[trt_sq]] + empty_cost_mol_list[names(cl)] <- cl + return(empty_cost_mol_list) + }) + )) + + # now we need other costs, which are nicely in breakdowns + bdt <- cbind(popu$breakdowns,drug_cost) + bdt$drug <- NULL + bdt$L1 <- trt_list_L1 + + bdt[,`:=`(other_costs = admin + mru_on + mru_off + ae_cost + eol, qaly = qaly + ae_qaly)] + bdt[,`:=`(admin = NULL, mru_on = NULL, mru_off = NULL, ae_cost = NULL, eol = NULL, ae_qaly = NULL)] + + # Now we just need life years and weightings: + + model_breakdown <- bdt + rm(bdt) + + # Now we have all the results we need for this PSA iteration consolidated + # together in one table. However, it's too granular and we haven't + # merged in the weightings yet. + model_breakdown + +}) + +# Return a list for this PSA iteration containing lifetime outcome table +# which collapses to the following columns +# +# Run | L1 trt | overall population | dcost mol 1, 2, 3,... | other costs | QALYs | LYs +# +# +# + +pop_lab <- names(results_disc) +names(pop_lab) <- pop_lab +# +# Return +deterministic_lambda_results <- rbindlist(lapply(pop_lab, function(pop_name) { + tab_with_ly <- merge.data.table(results_disc[[pop_name]],results_undsic[[pop_name]][,list(trt_n,LY)]) + tab_with_ly$oo_pop <- as.numeric(gsub("pop_","",pop_name)) + tab_with_ly +})) + +deterministic_lambda_results$dd_drug_price_options <- rep(i$dd_drug_price_options, nrow(deterministic_lambda_results)) +deterministic_lambda_results$iteration <- rep(1, nrow(deterministic_lambda_results)) + +saveRDS(deterministic_lambda_results, paste0("./4_Output/lambda_det_output_",gsub(":","_",date()),".rds")) + +# Weight results ------------------------------------------------- + +wa_model <- f_psa_computeWAModelRes( + R_table_sub_txts_prop_n_costs = i$R_table_sub_txts_prop_n_costs, + sims = 1, + lookups = p$basic$lookup, + psa_results = deterministic_lambda_results, + PSA = FALSE +) + +# Report results ------------------------------------------------- + +lu_mol = p$basic$lookup$ipd$mol + +results <- wa_model$weighted_average +results$cost <- apply(results[, c("mol_0", "mol_1", "mol_2", "mol_3","mol_4", "mol_5", + "mol_6", "mol_7", "mol_8", "mol_9", "mol_10", + "mol_11", "mol_12", "mol_999", "other_costs")],1, sum) + +results$trt <- NA + +results$trt[results$L1 == 1] <- "Cabozantinib plus nivolumab" +results$trt[results$L1 == 2] <- "Nivolumab plus ipilimumab" +results$trt[results$L1 == 3] <- "Lenvatinib plus pembrolizumab" +results$trt[results$L1 == 5] <- "Pazopanib" +results$trt[results$L1 == 6] <- "Tivozanib" +results$trt[results$L1 == 7] <- "Sunitinib" +results$trt[results$L1 == 8] <- "Cabozantinib" + + +results <- results[,c("dd_drug_price_options", "oo_pop", "L1", "trt", "iteration", "cost", "LY", "qaly")] + +#set up tables list +report_tables <- list() + +# read in formatted data +for (pricing in unique(results$dd_drug_price_options)) { + for (pop in unique(results$oo_pop)) { + results2 <- results[(results$dd_drug_price_option == pricing & + results$oo_pop == pop),] + + cost <- results2[,c(3,5,6)] + cost <- spread(cost, key = "L1", value = "cost") + cost <- cost[,-1] + + LY <- results2[,c(3,5,7)] + LY <- spread(LY, key = "L1", value = "LY") + LY <- LY[,-1] + + qaly <- results2[,c(3,5,8)] + qaly <- spread(qaly, key = "L1", value = "qaly") + qaly <- qaly[,-1] + + data <- list(raw = results2, + cost = cost, + LY = LY, + qaly = qaly) + rm(cost, LY, qaly) + + report_tables[[pricing]][[pop]] <- list(data = data) + names(report_tables[[pricing]])[pop] <- paste0("pop",pop) + } +} + +#create tables +for (pricing in unique(names(report_tables))) { + #pricing <- unique(names(report_tables))[1] + for (pop in unique(names(report_tables[[pricing]]))) { + #pop <- unique(names(report_tables[[pricing]]))[1] + + data <- report_tables[[pricing]][[pop]][["data"]] + totals <- t(rbind(t(apply(data$cost,2,mean)), + t(apply(data$LY,2,mean)), + t(apply(data$qaly,2,mean)))) + totals <- cbind(as.numeric(rownames(totals)), totals) + + colnames(totals) <- c("L1","costs", + "ly", + "qalys") + + table_for_increments <- as.data.table(totals) + + incremental <- f_res_mk_incremental(res_d = table_for_increments, + res_ud = table_for_increments, + lu_mol = lu_mol, + produce_plot = FALSE, + no_active_lines = 4, + output_weighted = "Yes")$non_dominated + + colnames(incremental)[colnames(incremental) == "L1"] <- "trt" + incremental$L1 <- lu_mol[match(incremental$trt, lu_mol$Description)]$Number + table_for_increments$trt <- lu_mol[match(table_for_increments$L1, lu_mol$Number)]$Description + table_for_increments <- merge(table_for_increments, incremental, all.x = TRUE) + table_for_increments <- table_for_increments[order(table_for_increments$costs),] + table_for_increments <- table_for_increments[,c("L1","trt", "costs", "qalys", "ly", "ic", "iq", "il", "ICER")] + + comparisons <- table_for_increments$L1[!is.na(table_for_increments$ic)] + + if (length(comparisons)<2) stop("Less than one valid incremental pair") + for (n in 2:length(comparisons)) { + inc <- data.frame(cost = data$cost[[which(comparisons[n] == names(data$cost))]] - + data$cost[[which(comparisons[n-1] == names(data$cost))]], + + ly = data$LY[[which(comparisons[n] == names(data$LY))]] - + data$LY[[which(comparisons[n-1] == names(data$LY))]], + + qaly = data$qaly[[which(comparisons[n] == names(data$qaly))]] - + data$qaly[[which(comparisons[n-1] == names(data$qaly))]]) + + if (round(mean(inc$cost),0) != round(table_for_increments$ic[table_for_increments$L1 == comparisons[n]],0)) stop("Error in mean increments") + + + } + totals <- merge(totals,table_for_increments) + + totals <- totals[,c("trt", "costs", + "ly", + "qalys", + "ic", + "il", + "iq", "ICER")] + totals <- totals[order(totals$costs),] + + report_tables[[pricing]][[pop]][["tables"]] <- list(totals = totals) + + } +} + + +pricing <- i$dd_drug_price_options + +lu_pop <- p$basic$lookup$pop_map + +ft_basic_bop <- do.call(rbind, lapply(structure( + names(report_tables[[pricing]]), .Names = names(report_tables[[pricing]]) +), function(popu_txt) { + popu <- report_tables[[pricing]][[popu_txt]]$tables$totals + popu_n <- as.numeric(gsub("pop", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + + + return(popu) + +})) + +setDT(ft_basic_bop)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + +Word_width_inches = 29.7*0.3937 + +ft_det_lamda_output <- ft_basic_bop %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches / (ncol(ft_basic_bop)))) %>% + theme_box() |> + set_header_labels( + values = list( + trt = "Technologies", + costs = "Costs (£)", + ly = "LYG", + qalys = "QALYs", + ic = "Inc. Costs", + il = "Inc. LYG", + iq = "Inc. QALYs", + ICER = "ICER incremental" + ) + ) %>% + flextable::colformat_double(j = c(2, 5, 8), + digits = 0, + prefix = "£") %>% + flextable::colformat_double(j = c(3, 4, 6, 7), digits = 2) %>% + add_footer_lines( + "Abbreviations: ICER, incremental cost-effectiveness ratio; LYG, life-years gained; QALY, quality-adjusted life-year" + ) %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, + size = 10, + part = c("header")) %>% + fontsize(i = NULL, + size = 10, + part = c("body")) %>% + fontsize(i = NULL, + size = 9, + part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) + +ft_det_lamda_output diff --git a/2_Scripts/standalone scripts/QC/Readme b/2_Scripts/standalone scripts/QC/Readme new file mode 100644 index 0000000..97d5f8b --- /dev/null +++ b/2_Scripts/standalone scripts/QC/Readme @@ -0,0 +1 @@ +This folder is where p and i are saved as QC outputs \ No newline at end of file diff --git a/2_Scripts/standalone scripts/fpnma cutup/FPNMA_cutup.R b/2_Scripts/standalone scripts/fpnma cutup/FPNMA_cutup.R new file mode 100644 index 0000000..31245fd --- /dev/null +++ b/2_Scripts/standalone scripts/fpnma cutup/FPNMA_cutup.R @@ -0,0 +1,60 @@ +# code to aggregate FPNMA means +library(data.table) + +folder_to_FPNMA_codas <- rstudioapi::selectDirectory() +filelist <- list.files(folder_to_FPNMA_codas) +filelist <- filelist[grepl(".rds",filelist)] + + +# restructure the coda into complete sets of 1000 runs for all PLMTE (so 16 files for 16,000 runs) +FPNMA <- readRDS(paste0(folder_to_FPNMA_codas,"/",filelist[1])) +runs <- max(FPNMA$run) + +for (i in seq(1, runs, by = 1000)) { + cat(i, "of", runs,"\n") + run <- FPNMA[FPNMA$run >= i & FPNMA$run < (i+1000),] + saveRDS(run, file = paste0(folder_to_FPNMA_codas,"/by_run/FPNMA_",i,"_",i+1000,".rds")) +} + +rm(FPNMA) +rm(run) + +for (j in 2:length(filelist)) { + cat("Processing file", filelist[j]) + FPNMA <- readRDS(paste0(folder_to_FPNMA_codas,"/",filelist[j])) + runs <- max(FPNMA$run) + + for (i in seq(1, runs, by = 1000)) { + cat(i, "of", runs,"\n") + run <- readRDS(paste0(folder_to_FPNMA_codas,"/by_run/FPNMA_",i,"_",i+1000,".rds")) + run2 <- FPNMA[FPNMA$run >= i & FPNMA$run < (i+1000),] + run <- rbind(run, run2) + rm(run2) + saveRDS(run, file = paste0(folder_to_FPNMA_codas,"/by_run/FPNMA_",i,"_",i+1000,".rds")) + } + + rm(FPNMA) + rm(run) +} + +#calculate means from 16,000 runs +filelist <- list.files(paste0(folder_to_FPNMA_codas,"/by_run")) +filelist <- filelist[grepl(".rds",filelist)] + +means <- list() +for (i in 1:length(filelist)) { + cat(filelist[i],"\n") + FPNMA <- readRDS(paste0(folder_to_FPNMA_codas,"/by_run/",filelist[i])) + FPNMA <- as.data.table(FPNMA) + means[[i]] <- FPNMA[,mean(HR), by = .(time, intervention_code, reference_treatment_code, line, endpoint, population, ref_trial_code)] +} + +means2 <- NULL +for (i in 1:length(means)) { + means2 <- rbind(means2,means[[i]]) +} +colnames(means2)[8] <- "HR" + +means2 <- means2[,mean(HR), by = .(time, intervention_code, reference_treatment_code, line, endpoint, population, ref_trial_code)] + +saveRDS(means2, file = paste0(folder_to_FPNMA_codas,"/FPNMA_means.rds")) diff --git a/2_Scripts/standalone scripts/markov modelling/reduce_tunnel.R b/2_Scripts/standalone scripts/markov modelling/reduce_tunnel.R new file mode 100644 index 0000000..6b916b6 --- /dev/null +++ b/2_Scripts/standalone scripts/markov modelling/reduce_tunnel.R @@ -0,0 +1,2344 @@ + +# Stand-alone script which was used during the model development stages. Read +# at your leisure. This shows some of the progression towards using sparse matrix +# block diagonal multiplications to compute the markov model. + + +# Libraries --------------------------------------------------------------- + +library(gtools) # for dirichlet +library(data.table) +library(tidyverse) +library(collapse) +library(bigalgebra) + +# Example data ------------------------------------------------------------ + +tp_top <- rdirichlet(99,c(95,2,1)) +tp_mid <- rdirichlet(99,c(0,93,7)) +tp_bottom <- c(0,0,1) + +# List of TPMs with some random variation cycle to cycle +tp <- lapply(1:dim(tp_top)[1], function(cycle) { + matrix( + c( + tp_top[cycle,], + tp_mid[cycle,], + tp_bottom + ), + nrow = 3, + byrow = TRUE + ) +}) + + +# Reduce through TPMs + +prev_cycle <- c(1,0,0) +tun_length <- 2 +tun_death <- c(0,0.15,0.1) + +mk_top_row <- matrix(c(1,0,0),nrow=1) +tun_top_row <- matrix(c(0,0,0),nrow=1) + + + +# Attempts at cracking it ------------------------------------------------- + + + +# ~ What a tunnel state does ---------------------------------------------- + +# A tunnel state pulls patients out of the state residency into a special state +# with a fixed duration. This state can only be exited via death or making it to the end. +# +# In a Markov model, this means that simply repeatedly applying matrix multiplication +# is incorrect, since transition probabilities are proportional, so you can't work out +# the trace with a tunnel without building the tunnel into the multiplication steps. + +# Let's go ultra simple and try to work through a tunnel state that interrupts 1->2 for +# THREE CYCLES, and kills off 10% each cycle in there, then feeding into state 2 afterwards: + +tpm <- tp[[1]] + +p <- c(1,0,0) + +tun_dur <- 3 +tun_qx <- c(0.1,0.1,0.15) + +# Ignoring the tunnel +tr_noTun <- Reduce( + x = 1:100, + init = p, + accumulate = TRUE, + f = function(previous_result, cycle_number) { + previous_result %*% tpm + } +) + +tr_noTun <- do.call(rbind,tr_noTun) + +head(tr_noTun) + +# Including the tunnel but fixing the duration at 3 cycles: + +TH <- 100 + +p <- matrix(nrow = TH, + ncol = 3 + 3) # Note the extra state for "inside the tunnel" allowing each row in p to sum to 1 is not here!!! +p[1,] <- c(1,rep(0,3 + 3 - 1)) # Note the extra state for "inside the tunnel" allowing each row in p to sum to 1 is not here!!! + +p[is.na(p)] <- 0 + +colnames(p) <- c("1L", "tun1", "tun2", "tun3", "2L", "dead") + + +# Note that tun_trace lags behind p by 1 cycle. To adjust for that, the code starts +# in the 2nd row + +# OS within the tunnel - if tunnel qx is time-varying simply replace with S(t) vector. +# done that here as an example. +tun_os <- cumprod(c(1,(1-tun_qx))) + +# cumulative mort in the tunnel: +tun_mort <- 1-tun_os + +# so for each batch of patients that go into the tunnel, we have an overall survival +# and cumulative mortality line (just multiply the above by entry pop). +# +# With that we can track populations inside the tunnel, and calculate those cohorts +# all at once + + +# Next, we can't just do the above Reduce approach of iteratively multiplying the +# population in cycle-1 by the tpm to get the new population, since there are patients +# getting taken out of it. Instead, what we could do is expand the TPM: +tpm <- as.data.table(tpm) + +tpm$V4 <- 0 +tpm$V5 <- tpm[,V2] +tpm$V6 <- tpm[,V3] +tpm$V2 <- 0 +tpm$V3 <- 0 + +# Set up the tunnel +tpm[1,"V2"] <- tpm[1,"V5"] +tpm[1,"V5"] <- 0 + +tpm2 <- do.call(rbind,lapply(1:nrow(tpm), function(rw) { + if (rw == 1 | rw == 3) { + return(as.numeric(tpm[rw,])) + } else if (rw == 2) { + do.call(rbind,list( + rep(0,ncol(tpm)), + rep(0,ncol(tpm)), + rep(0,ncol(tpm)), + as.numeric(tpm[rw,]) + )) + } +})) + + +dimnames(tpm2) <- list(c("1L", "tun1", "tun2", "tun3", "2L", "dead"),c("1L", "tun1", "tun2", "tun3", "2L", "dead")) + +tpm2["tun1","dead"] <- tun_qx[1] +tpm2["tun1","tun2"] <- 1-tun_qx[1] +tpm2["tun2","dead"] <- tun_qx[2] +tpm2["tun2","tun3"] <- 1-tun_qx[2] +tpm2["tun3","dead"] <- tun_qx[3] +tpm2["tun3","2L"] <- 1-tun_qx[3] + +# Now that we have expanded the transition probability matrix to incorporate the +# tunnel state directly, it should be the case that we can now simply multiply it out! + +trace_tun <- matrix( + unlist(Reduce( + x = 2:TH, + init = p[1,], + accumulate = TRUE, + f = function(prev_pop, cyc) { + prev_pop %*% tpm2 + } + )), + nrow = TH, + byrow = TRUE, + dimnames = list(NULL,c("1L", "tun1", "tun2", "tun3", "2L", "dead")) +) + +# Success! this is a 3 cycle tunnel state. Let's write it into a function where we can defined a FIXED tunnel duration. + + +# Making fixed tunnel into a function ------------------------------------- + +TH <- 100 + +state_names <- c(paste0("line_",1:5),"bsc","dead") + +bl_pop <- c(1,rep(0,length(state_names)-1)) +names(bl_pop) <- state_names + +tp_1L <- rdirichlet(TH,c(95,2 ,0 ,0 ,0 ,3 ,4)) +tp_2L <- rdirichlet(TH,c(0 ,93,6 ,0 ,0 ,3 ,5)) +tp_3L <- rdirichlet(TH,c(0 ,0 ,92,5 ,0 ,3 ,6)) +tp_4L <- rdirichlet(TH,c(0 ,0 ,0 ,91,3 ,3 ,7)) +tp_5L <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,90,3 ,8)) +tp_BSC <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,0 ,85,9)) +tp_dead <- c(0, 0, 0, 0, 0, 0, 1) + +# List of TPMs with some random variation cycle to cycle to simulate time-varying +# transition probabilities between states. Patients can't go backwards in line, and +# can't skip lines except for going straight to bsc (i.e. choosing no treatment/palliative care) +tp <- lapply(1:TH, function(cycle) { + matrix( + c(tp_1L[cycle,], + tp_2L[cycle,], + tp_3L[cycle,], + tp_4L[cycle,], + tp_5L[cycle,], + tp_BSC[cycle,], + tp_dead + ), + nrow = 7, + byrow = TRUE, + dimnames = list(state_names,state_names) + ) +}) + + +# Other parameters for our function: + +tun_pos <- 1 +tun_len <- 3 +tun_mort <- c(0.1,0.08,0.15) +tpm <- tp[[1]] + +# Now for the function! we're going to make it possible to expoand the transition probability +# matrix at any position and insert a tunnel state there. This function will +# expand ONE TPM. you can then use lapply to apply it to all of them in a list +# or apply to time invariant as you wish! + +f_mk_FixedTunnelMarkovExpand <- function(tpm, tun_pos, tun_len, tun_mort) { + + # always start with validation: + # Determine whether tun_mort is a single value or a vector: + if (!length(tun_mort) %in% c(1,tun_len)) stop("tunnel mort must be either 1 number, or a vector of the same length as tun_len!") + if (tun_len == 1) stop("tun_len should be at least 2") + + # Establish the entrance to the tunnel. For example if tun_pos is 1 then + # it's the transition tpm[1,2], if it's 3 then it's tpm[3,4]. Move that TP from its + # place in aft to a separate object, and then set it to 0 in aft. + + pr_enter_tun <- tpm[tun_pos,tun_pos+1] + tpm[tun_pos,tun_pos+1] <- 0 + + # Let's make a way to appropriately name this tunnel (i.e. tun_ prefix and numeric positoins) + + if (!is.null(dimnames(tpm)[[1]])) { + tunnel_start_state <- dimnames(tpm)[[1]][tun_pos] + tun_lab <- paste0("tun_",tunnel_start_state,"_",1:tun_len) + } else { + tun_lab <- paste0("tun_",tun_pos,"_",tun_pos+1,"_",1:tun_len) + } + + # So now we need to slice up the matrix and add some zeros. We're going to + # insert a tunnel state to tpm that STARTS WHEN EXITING tun_pos + # state (which allows you to enter the number 1). + # This tunnel is tun_len cycles long and has tun_mort associated + # mortality whilst inside the tunnel + # + # This means that we should first add the zero rows in the right place, then + # the zero columns (it's less faff that way) + + bef <- tpm[1:tun_pos,,drop=FALSE] + aft <- tpm[(tun_pos+1):dim(tpm)[1],,drop=FALSE] + + empty_row <- rep(0,dim(tpm)[2]) + names(empty_row) <- dimnames(bef)[[2]] + empty_rows <- do.call(rbind,lapply(1:tun_len, function(tun_cyc) empty_row)) + rownames(empty_rows) <- tun_lab + + tpm <- do.call(rbind,list( + bef, + empty_rows, + aft + )) + + # Now do the same thing for columns too: + bef <- tpm[,1:tun_pos,drop=FALSE] + aft <- tpm[,(tun_pos+1):dim(tpm)[2],drop=FALSE] + + empty_col <- rep(0,dim(tpm)[1]) + names(empty_col) <- dimnames(bef)[[1]] + empty_cols <- do.call(cbind,lapply(1:tun_len, function(tun_cyc) empty_col)) + colnames(empty_cols) <- tun_lab + + tpm <- do.call(cbind,list( + bef, + empty_cols, + aft + )) + + # Great! now we have a larger transition probability matrix which has a space + # for all of the possible transitions, including those for the tunnel state. + # From here, we can systematically populate the transition probabilities within + # the tunnel, using tun_mort, depending on whether it's a single number or + # vector of numbers + + # First off, put the transition probability we pulled out at the start of the + # tunnel: + tpm[tun_pos,tun_pos+1] <- pr_enter_tun + + if (length(tun_mort) == 1) { + tun_mort <- rep(tun_mort,tun_len) + } + + # Now cycle through the cycles in the tunnel, informing the correct cell in + # the tpm with the probability of staying in the tunnel or death, such that the + # final tpm returned from this function has rows that sum to 1 and a tunnel in it! + return( + Reduce( + x = 1:tun_len, + accumulate = FALSE, + init = tpm, + f = function(prev, index) { + # the index'th cycle within the tunnel - patient can die or transition into + # the next cycle of the tunnel (or out of the tunnel) + prev[tun_pos + index, ncol(prev)] <- tun_mort[index] + prev[tun_pos + index, tun_pos + index + 1] <- 1-tun_mort[index] + return(prev) + } + ) + ) + +} + + +# now let's test this function: +tpm2 <- f_mk_FixedTunnelMarkovExpand( + tpm = tpm, + tun_pos = 2, + tun_len = 2, + tun_mort = c(0.15,0.35) +) + + +bl_pop <- + c( + line_1 = 1, + line_2 = 0, + tun_line_2_1 = 0, + tun_line_2_2 = 0, + line_3 = 0, + line_4 = 0, + line_5 = 0, + bsc = 0, + dead = 0 + ) + +test_trace <- do.call( + rbind, + Reduce( + x = 1:(100-1), + init = bl_pop, + accumulate = TRUE, + f = function(prev, cycle) { + prev %*% tpm2 + } + ) +) + + +rowSums(test_trace) + +test_trace <- as.data.table(test_trace) +test_trace$t <- 0:(nrow(test_trace)-1) + + +test_trace <- melt(test_trace,id.vars = "t",variable.name = "state", value.name = "pop") + +ggplot(test_trace, aes(x = t, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + +# WOOP! all rows sum to 1 and it has a tunnel state in it which we can place anywhere +# and change the length of. + +# Problem is, the same cohort should consistently be exposed to those TPs, so this +# approach has the weakness that tun_mort can't be dependent on absolute time since +# baseline, just on time in tunnel given line before entering the tunnel. + + +# Now let's try adding another tunnel to this tunnel-state matrix! + +tpm3 <- f_mk_FixedTunnelMarkovExpand( + tpm = tpm2, + tun_pos = 5, + tun_len = 3, + tun_mort = c(0.1,0.2,0.15) +) +tpm4 <- f_mk_FixedTunnelMarkovExpand( + tpm = tpm3, + tun_pos = 9, + tun_len = 2, + tun_mort = c(0.05,0.1) +) +tpm5 <- f_mk_FixedTunnelMarkovExpand( + tpm = tpm4, + tun_pos = 12, + tun_len = 4, + tun_mort = c(0.05,0.09,0.1,0.15) +) + +# Now that we've got a fully fleshed out transition probability matrix: + +# this will create the vector of column names and print it to the console for your +# convenience. Instead we do it programatically: +# dput(colnames(tpm5)) + +st_nam <- colnames(tpm5) +bl_pop <- rep(0,length(st_nam)) +names(bl_pop) <- st_nam +bl_pop[1] <- 1 + +test_trace <- do.call( + rbind, + Reduce( + x = 1:(100-1), + init = bl_pop, + accumulate = TRUE, + f = function(prev, cycle) { + prev %*% tpm5 + } + ) +) + +# All rows still sum to 1 :) +rowSums(test_trace) + +test_trace <- as.data.table(test_trace) +test_trace$t <- 0:(nrow(test_trace)-1) +test_trace <- melt(test_trace,id.vars = "t",variable.name = "state", value.name = "pop") +ggplot(test_trace, aes(x = t, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + +# As there are so many individual time points within tunnels, we might just want +# to see the full tunnel population for each tunnel. This is a bit fiddly to do +# programatically but worth it + + +test_trace <- do.call( + rbind, + Reduce( + x = 1:(100-1), + init = bl_pop, + accumulate = TRUE, + f = function(prev, cycle) { + prev %*% tpm5 + } + ) +) + +orig_nams <- colnames(tpm) +names(orig_nams) <- orig_nams + +full_nam <- colnames(tpm5) +tun_nams <- full_nam[grep("tun_",full_nam)] + +tun_pops <- lapply(orig_nams, function(orig_state) { + + red_nam <- full_nam[grep(orig_state,full_nam)] + + # If there's no tunnel state following this state just return a NULL value + if(length(red_nam) == 1) return(NULL) + + # If there is a tunnel, we now know the columns to add up to get our total + # tunnel state residency by time t + col_sel <- red_nam[which(red_nam != orig_state)] + + rowSums(test_trace[,col_sel]) + +}) + +# Reduce down to just the tunnel populations +tun_pops <- tun_pops[which(!sapply(tun_pops,is.null))] + +# rename them appropriately +names(tun_pops) <- paste0("tun_",names(tun_pops)) + +# turn them into a data.table +tun_pops <- as.data.table(tun_pops) + +# remove all tunnels breakdowns from the trace, and add the summed ones back in: +trace2 <- cbind(test_trace[,orig_nams],tun_pops) + +# All still sum to 1 in rows :) +rowSums(trace2) + + +# Now we can plot: +trace2 <- as.data.table(trace2) +trace2$t <- 0:(nrow(trace2)-1) +trace2 <- melt(trace2,id.vars = "t",variable.name = "state", value.name = "pop") +ggplot(trace2, aes(x = t, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + +# So, we can now include any number of tunnel states of any length to any TPM. +# We're getting somewhere now! + + +# the big one ------------------------------------------------------------- + +# ok now we want to do time varying transition probabilities with different exit +# time for tunnel per cycle, so that the mortality in the tunnel stays the same, but +# its length can move around from cycle to cycle + +vec_tun_len <- sample(1:4,TH,TRUE) +max_tun_len <- max(vec_tun_len) + +# To make it even more complicated, the tunnel mortality can vary over time IF +# mortality in the tunnel is dependent on both t and t_tun (i.e., time inside +# tunnel): + +# Just for an example I'm going to generate a bunch of beta draws, separated by cycle. +# The reason it's a list is because each element is a different length, so you can't +# just bind them together. +# +# Programming wise, it's just a case of l_tun_os[[cohort]][cohort_cycle,] instead of l_tun_os[cycle,] +# so it's trivial to adapt to it. in other words, for each cohort there's a totally +# different set of TPs to the death state whilst within the tunnel, given +# absolute time AND time since entering the tunnel (i.e., p(death | t, t_tun)). +# I cannot see why any further level of branching would be required beyond this, +# and in most CE modelling cases I can see probability of death being only a function +# of time in tunnel OR absolute time. In both these cases, one would simply adapt +# the below lapply to appropriately generate these death probability matrices. +# +# To clarify - here we're saying that mortality whilst within tunnel depends on +# both the cycle you went into the tunnel AND how long you've been in the tunnel given +# the time that you entered the tunnel. This accounts for "counting back" + +l_tun_os <- lapply(1:length(vec_tun_len), function(cyc) { + # Make a matrix which contains our (example) random numbers for probability of death + # in tunnel. obviously these will be different when we're using real data. Transpose + # it so we can split it by row afterwards. Speeds things up later so we may as well + # do it now. + + tun_len_this_cohort <- vec_tun_len[cyc] + + tp_i <- t(matrix( + data = rbeta((TH - cyc + 1) * tun_len_this_cohort,10,100), + nrow = TH - cyc + 1, + ncol = tun_len_this_cohort, + byrow = TRUE, + dimnames = list(NULL,NULL) + )) + tp_i <- split(tp_i, rep(1:ncol(tp_i), each = nrow(tp_i))) + names(tp_i) <- NULL + + # extend the values with 100% death probability to capture the impossible people + # who shouldn't be there anyway. This way will force rowsums to be + + if (max_tun_len - tun_len_this_cohort > 0) { + tun_ext_len <- rep(1,max_tun_len - tun_len_this_cohort) + return(lapply(tp_i, function(this_cycle_this_cohort) c(this_cycle_this_cohort, tun_ext_len))) + } else { + return(tp_i) + } +}) + +# We now refer to each death vector in a very simple way: l_tun_os[[cohort]][[model_cycle]] +# This gets us the p(death | t, t_tun) where this time t_tun is the time at which +# that cohort ENTERED the tunnel. +# +# This allows rule-based extrapolations (e.g. if there's an OS line for tunnel +# before and after 6 months from baseline) or relative rule-based extrapolations +# (e.g. if 6 months after entering the previous line get this OS, otherwise that one) +# allowing for really complicated interactions between lines and tunnels to be +# accomplished whilst assuring that no patients get lost. + +# To recap, we have a tunnel state that changes in duration every cycle, and in +# mortality rate as well (so that each cohort has its own unique OS line!) + +# So let's start with tp, the set of TPs we made earlier that goes to the TH + +# For each model cycle we need to make a vector of EXPANDED TPMs that has TH elements in it. +# This should have a tunnel which is max_tun_len in length, but has TP of 0 to the +# tunnel cycles which lie beyond the extent for that cohort's tunnel. To illustrate, +# let's imagine the following: +# +# - Patients in cycle 1 have a tunnel between 1 and 2 that is 2 cycles in length +# - patients in cycle 2 have a tunnel between 1 and 2 that is 3 cycles in length +# - the value of max_tun_len is 4 cycles (i.e. the longest tunnel in the time horizon) +# +# So, for our cycle 1 people, we have to make a tunnel with 4 cycles, but make it +# impossible to go from tun_line_1_2 to tun_line_1_3. +# +# We actually do not NEED to set the probabilities in subsequent cycles to 0, we +# just need to make it impossible to continue down the tunnel. this means simply +# replacing the 1-death probability of going to the next tunnel cycle with 0 and +# put that number in the probability to go to the next treatment line! +# +# + +# As we have different lengths of tunnel and different mortality within tunnel, +# we can now generate our huge blob of expanded TPMs! +# +# Here, let's put in our tunnel between lines 3 and 4. this lets enough patients +# slip through other states so that they're not ALL going in the tunnel. + +ex_tun_pos <- 3 + +tp_tun_vary <- lapply(1:TH, function(this_cycle_cohort) { + + # remember that here we're going to generate TH matrices for EACH cycle cohort! + + # Extend the length of the tunnel to match the max and kill everyone that's doing + # the impossible - this will mean that if it's wrong, the rows won't sum to 1 at + # the end! + + # The length of the tunnel for this cohort in cycles. This lets us know where + # to move the exit probability in the matrix from. Where to move it to is a function + # of this value AND the positioning of the tunnel (i.e. tun_pos when expanding) + this_tun_mort <- l_tun_os[[this_cycle_cohort]] + + # So, we have a list of vectors. Each one of these is the conditional death probability + # patients inside the tunnel face at each model cycle. This can count back + # because when this list was made it can be made to count back, so it's all + # led by the data by this point. + + lapply(this_cycle_cohort:TH, function(this_cohort_this_cycle) { + + # For this cycle for this cohort, Expand the tp matrix (which ONLY depends on t) + # and insert the correct tunnel mortality estimates (which depend on t and t|t_tun) + + which_tun_mort <- this_cohort_this_cycle - this_cycle_cohort + 1 + + tpm <- f_mk_FixedTunnelMarkovExpand( + tpm = tp[[this_cohort_this_cycle]], # for this model cycle (for this absolute t) + tun_pos = ex_tun_pos, + tun_len = max_tun_len, + tun_mort = this_tun_mort[[which_tun_mort]] + ) + + # Right, now we have a TPM which kills everyone when they get to the nth cycle + # inside the tunnel, if the tunnel length is less than max_tun_len. Therefore + # we simply need to make the probability of going into that next tunnel cycle + # 0, and the probability of going out of the tunnel to the next treatment line + # equal to the probability that we did have for going to the next tunnel cycle. + # + # To simplify to mechanics + # - the row number is ex_tun_pos + # - the origin column is ex_tun_pos + length(this_tun_mort[[this_cohort_this_cycle]]) + # - the destination column is ex_tun_pos + max_tun_len + 1 + + tun_len_this_cohort <- vec_tun_len[this_cycle_cohort] + + if (max_tun_len - tun_len_this_cohort > 0) { + row <- ex_tun_pos + tun_len_this_cohort + col_from <- ex_tun_pos + tun_len_this_cohort + 1 + col_to <- ex_tun_pos + max_tun_len + 1 + + p_ft <- tpm[row,col_from] + tpm[row,col_from] <- 0 + tpm[row,col_to] <- p_ft + } + + + # Great, so it's now impossible to go too far into the tunnel as the transition + # into that tunnel cycle (and therefore any that follow it) has 0% probability. + # Patients that would have gone into that next tunnel (i.e. haven't died) are + # now going out of the tunnel directly to the next treatment line. + + return(tpm) + + }) + +}) + + +# Simplified version for programming -------------------------------------- + +# Version with cycle after line 1 for varying cycles. easier to run through +# stuff if it's from the first state: + +ex_tun_pos <- 1 +tp_tun_vary <- lapply(1:TH, function(this_cycle_cohort) { + this_tun_mort <- l_tun_os[[this_cycle_cohort]] + lapply(this_cycle_cohort:TH, function(this_cohort_this_cycle) { + which_tun_mort <- this_cohort_this_cycle - this_cycle_cohort + 1 + tpm <- f_mk_FixedTunnelMarkovExpand( + tpm = tp[[this_cohort_this_cycle]], # for this model cycle (for this absolute t) + tun_pos = ex_tun_pos, + tun_len = max_tun_len, + tun_mort = this_tun_mort[[which_tun_mort]] + ) + tun_len_this_cohort <- vec_tun_len[this_cycle_cohort] + if (max_tun_len - tun_len_this_cohort > 0) { + row <- ex_tun_pos + tun_len_this_cohort + col_from <- ex_tun_pos + tun_len_this_cohort + 1 + col_to <- ex_tun_pos + max_tun_len + 1 + + p_ft <- tpm[row,col_from] + tpm[row,col_from] <- 0 + tpm[row,col_to] <- p_ft + } + return(tpm) + }) +}) + +tun_from_state <- "line_1" +tun_entry_state <- "tun_line_1_1" + +empty_row <- tp_tun_vary[[1]][[1]][1,] +empty_row <- sapply(empty_row,function(x) 0) + +bl <- tp_tun_vary[[1]][[1]][1,] +bl <- sapply(bl,function(x) 0) +bl[1] <- 1 + +# calculate the population that never goes in the tunnel. +trace_no_tun <- Reduce( + x = 1:(TH-1), + accumulate = TRUE, + init = bl, + f = function(prev,cyc) { + this_tp <- tp_tun_vary[[1]][[cyc]] + this_tp[tun_from_state,tun_entry_state] <- 0 + prev %*% this_tp + } +) +trace_no_tun <- matrix( + unlist(trace_no_tun), + nrow = TH, + byrow = TRUE, + dimnames = list(NULL,dimnames(tp_tun_vary[[1]][[1]])[[2]]) +) + +non_tun_pop <- 1-rowSums(trace_no_tun) +tun_entry <- non_tun_pop - shift(non_tun_pop,1,0) + +empty_trace <- matrix( + rep(0,TH*length(bl)), + nrow = TH, + dimnames = list(NULL,names(bl)) +) + +# This is the tricky-dicky bit - we create TH traces, one for each cohort +# entering the tunnel, and then we extrapolate the state residency of each one +# of them one by one +tun_traces <- lapply(1:TH, function(cyc) { + pop <- empty_trace + pop[cyc,tun_entry_state] <- tun_entry[cyc] + pop +}) + +# Now, we have the trace without the tunnel state in it (losing people), and +# individual traces for every entry point to the tunnel. We can now apply the +# correct TPMs to each of these traces to calculate them. the sum of all the matrices +# is then the full trace with varying-length-time-varying tunnel state! + +tun_list <- lapply(1:TH, function(tunnel_entry_time) { + + # pull out the cohort that entered the tunnel at cycle tunnel_entry_time + cohort <- tun_traces[[tunnel_entry_time]] + + if (tunnel_entry_time == TH) return(cohort) + + # Calculate a trace for this population, using tp_tun_vary[[tunnel_entry_time]][1:(TH-tunnel_entry_time)] + Reduce( + x = 1:(TH-tunnel_entry_time), + init = cohort, + accumulate = FALSE, + f = function(p, c) { + p[tunnel_entry_time + c,] <- p[tunnel_entry_time + c - 1,] %*% tp_tun_vary[[tunnel_entry_time]][[c]] + p + } + ) +}) + +# Unit test - all summing to 1???? +rowSums(Reduce(`+`,tun_list)) + rowSums(trace_no_tun) + +# I....I think I did it....I think I just solved time varying duration varying tunnel states... + +trace_list <- list( + trace_no_tun, + tun_list +) + +full_trace <- Reduce(`+`,tun_list) + trace_no_tun + +rowSums(full_trace) + +full_trace <- as.data.table(full_trace) +full_trace$t <- 0:(nrow(full_trace)-1) +full_trace <- melt(full_trace,id.vars = "t",variable.name = "state", value.name = "pop") +ggplot(full_trace, aes(x = t, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + + + +# ~~ Two tunnels? --------------------------------------- + +# ok this is all well and good for 1 tunnel, but what about when there are further tunnels, which also vary by +# duration depending on cycle of entry, and also have OS depending on cycle of entry? What do we do then? +# +# We might have to go row by row: + + +# Let's use tp again so we have time varying TPMs + +str(tp,list.len = 2) + +# Let's imagine we have 2 tunnels, one varying between 1 and 4 cycles, and the other +# varying between 2 and 8 cycles in length, depending on the cycle number of entry, +# which I'm going to call e forthwith. +tun_len <- list( + sample(1:4,TH,TRUE), + sample(2:8,TH,TRUE) +) + +# We want the ORIGINAL position of each tunnel. To test robustness, we're going to say +# that the tunnels are upon exiting 1L and 3L: +tun_pos <- list(1,3) + + +# 2 layers of nesting here. first is the tunnel, t, and the other is cycle of entry e +# h_tun[[t]][[e]] is the hazard to be applied to the cohort that entered tunnel t +# at cycle e, for each cycle inside the tunnel, d. To recap: +# +# - c is model cycle +# - t is tunnel +# - e is cycle of entry into tunnel (e.g. cohort entered tunnel t in c = 2, therefore e = 2) +# - d is time inside tunnel (e.g. cohort entered tunnel in cycle 2, and it is now cycle 5, d = 3) +# +# The 1's are added to keep them all the same length - this helps when creating the TPMs, otherwise +# the population vectors produced at each model cycle will have different lengths, which will make +# it more annoying to bind it all together. Better to keep it tidy in this way instead! +# +# Obviously, these are just beta draws and instead will have to come from some data +# or analysis for the CE model. This could potentially be slices from an OS extrapolation +# for being between treatment lines (censoring for going onto next treatment to isolate +# the hazard of just death whilst waiting for next treatment line) +# +# if, for example the expected time between discontinuing 1L and starting 2L is +# 5 weeks for those discontinuing in cycle 1, then use the hazard for those 5 weeks starting +# from c=5? Hazard for those with e=6 should be h(death | t = 1, c = 6, e = 6) then +# h(death | t = 1, c = 7, e = 6) and so on to make the OS line for the cohort with +# that value of e, correct? We would then get (TH-1) OS lines per t, like the below: +# +h_tun <- lapply( + X = tun_len, + FUN = function(D) { + lapply(1:length(D), function(c) { + d_given_te <- D[c] + tp_i <- rbeta(d_given_te,floor(rnorm(1,10,2)),floor(rnorm(1,100,2))) + return(c(tp_i, rep(1,max(D) - d_given_te))) + }) + } +) + + +# Now we perform the transition probability matrix expansion. To walk through it: +# +# - the cohort with e=1 has TH TPMs, each applying the mortality in h_tun for e=1 (i.e. lapply(h_tun, function(x) x[[1]])) +# - the cohort with e=2 has TH-1 TPMs, each applying the mortality in h_tun for e=2 (i.e. lapply(h_tun, function(x) x[[2]])) +# - ... +# - the cohort with e=TH-1 has 1 TPM, applying the mortality in h_tun for e=TH-1 (i.e. lapply(h_tun, function(x) x[[TH]])) +# +# In that sense, it's kind of like a pyramid or a right-angled triangle facing point down, with the +# number of TPMs required reducing as we go further into the model +# + +n_tun <- length(tun_pos) + +# Work out what the final positions in the TPMs will be for our 2 tunnels +tun_positions <- Reduce( + x = 1:length(tun_pos), + init = tun_pos, + accumulate = FALSE, + f = function(prev, tun) { + if (tun == 1) { + return(prev) + } else { + max_len <- max(tun_len[[tun-1]]) + prev[[tun]] <- prev[[tun - 1]] + max_len + tun_pos[[tun]] - 1 + return(prev) + } + } +) + + +tp_triangle <- lapply(1:TH, function(e) { + + # Get mortality and tunnel length for cohorts entering tunnels at cycle c (i.e. where e = c) + this_tun_mort <- lapply(h_tun, function(t) t[[e]]) + this_tun_len <- lapply(tun_len, function(t) t[[e]]) + + # Now, d is between e and the time horizon TH. This is our iterator, as it tells us + # which TPM to use (i.e. transitions for non tunnel states for cycle c) + d <- e + 0:(TH - e - 1) + + # Now, iterating along d, take the correct TPM, and expand it to include all the + # tunnels, applying this_tun_mort and making the tunnel exit adjustment for this_tun_len. + # This will result in length(d) TPMs, all for the correct c and e for all t. Note, The + # index is actually c here as it's that absolute time step in the model! + lapply(d, function(c) { + Reduce( + x = 1:length(this_tun_mort), + init = tp[[c]], + accumulate = FALSE, + f = function(prev_tpm, tun) { + + # expand the tpm for model cycle c using mortality for entry cycle e along d + tpm_expanded <- f_mk_FixedTunnelMarkovExpand( + tpm = prev_tpm, + tun_pos = tun_positions[[tun]], + tun_len = max(tun_len[[tun]]), + tun_mort = this_tun_mort[[tun]] + ) + + # Correct the matrix to make patients exit the tunnel corresponding to + # d given e. Identify the row we're changing & which column is going where. + row_from_to <- tun_positions[[tun]] + this_tun_len[[tun]] + col_from <- tun_positions[[tun]] + this_tun_len[[tun]] + 1 + col_to <- tun_positions[[tun]] + max(tun_len[[tun]]) + 1 + + # Make the changes to the TPM + tpm_expanded[row_from_to,col_to] <- tpm_expanded[row_from_to,col_from] + tpm_expanded[row_from_to,col_from] <- 0 + + # it is now impossible to go beyond the end of the tunnel, and instead + # patients that survive will go to the next treatment line. the Reduce + # will then add the next tunnel state if there are any + return(tpm_expanded) + } + ) + }) +}) + +# To illustrate - the length of the list for each e goes down by 1 +unlist(lapply(tp_triangle,length)) + + +# tp_triangle has TH-1 TPMs for the first e, TH-2 for the 2nd and so on. + +empty_pop <- unlist(lapply(as.list(tp_triangle[[1]][[1]][1,]),function(x) 0)) +bl_pop <- empty_pop +bl_pop[1] <- 1 + +empty_trace <- matrix( + data = rep(empty_pop,TH), + nrow = TH, + byrow = TRUE, + dimnames = list(NULL,names(empty_pop)) +) + +bl_trace <- empty_trace +bl_trace[1,1] <- 1 + +# Right, let's try to apply these tpms now. The way you do this is you separate out +# tunnel entrants from everyone else, and put the tunnel entrants into their +# e-based slot. To calculate a few of them through: + +# apply the TPM for e=1 c=1 +pop_c2 <- bl_pop %*% tp_triangle[[1]][[1]] + +# These are people going into the tunnels at cycle 2 so e is 2 (i.e. c + 1) +e <- 2 + +# Feed the tunnel entrants for this e into their space for trace: +pop_e2 <- empty_trace +pop_e2[e,unlist(tun_positions)+1] <- pop_c2[unlist(tun_positions)+1] + +# Patients entering either tunnel with e of 2 can now have a trace calculated using Reduce: + +tr_e2 <- rbind( + do.call(rbind, lapply(1:(e - 1), function(c) empty_pop)), + do.call( + rbind, + Reduce( + x = (e + 1):TH, + init = pop_e2[e, ], + accumulate = TRUE, + f = function(tr, cyc) { + tr %*% tp_triangle[[1]][[cyc - e]] + } + ) + ) +) + +# Right, that's the trace for people entering tunnels in cycle 2. let's check we're not +# losing anyone: + +rowSums(tr_e2) + +# Great, so we're not losing anyone and we're tracing through cohort e=2, through +# all tunnels. + +# Lovely. so essentially all we have to do now is repeat the above for c = 1:TH to get +# TH Markov traces. Each time, we get our new cohort, "split them off" from the rest, +# calculate the trace for that cohort, and move onto the next cycle c. We should end up with +# TH traces, one for those that never go in a tunnel and TH-1 for people going in tunnels +# at different times +# + + + + + + + + + +# So, as before we've got a nested list with the following levels: +# +# 1. Cycle that the cohort ENTERS the tunnel, cyc +# 2. Cycles since that cohort that ENTERED the tunnel at cyc have BEEN in the tunnel +# +# l_tun_os2[[1]][[1]] Entered the tunnel in cycle 1, first cycle in the tunnel +# l_tun_os2[[2]][[1]] Entered the tunnel in cycle 2, first cycle in the tunnel +# l_tun_os2[[5]][[3]] Entered the tunnel in cycle 5, 3rd cycle in the tunnel +# +# + + + + +# Turning the example into a function ------------------------------------- + +# So we've cracked it I think withouth making any assumptions about where the tunnel +# state is or how long it is. now we have two challenges remaining: +# +# 1. turn it into a function +# 2. make it cope with multiple tunnel states +# +# + + + +#' Function to compute transition probability matrices including a varying-duration +#' tunnel state which can then be used with another function to calculate the required traces +#' +#' @param list_tun_os a list of lists of mortality vectors. The top level is tunnel number, the 2nd level is entry cycle, the 3rd level is cycles since tunnel entry +#' @param tpm_list transition probability matrix list BEFORE tunnels inserted - one for each model cycle, or list of repeated if time invariant +#' @param init_pop (optional) initial population. will be assumed 100% in first cycle if left blank +#' +f_mk_varyTunneltpms <- function(list_tun_os, tpm_list, tun_position, vec_tun_len, init_pop = NULL) { + + # Step 1 - generate the set of TPMs taking tunnel mort given time and time in tun + + TH <- length(vec_tun_len) + + if(is.null(init_pop)) { + init_pop <- c(1,rep(0,dim(tpm_list[[1]])[2]-1)) + names(init_pop) <- dimnames(tpm_list[[1]])[[2]] + } + + # The first layer of nesting is the time of entry to the tunnel, and the second layer + # is tinme since entering the tunnel. + + tp_tun_vary <- lapply(1:TH, function(this_cycle_cohort) { + + # For this entry time to the tunnel, repeatedly expand the TPM to take into + # account all tunnel states: + this_tun_mort <- l_tun_os[[this_cycle_cohort]] + lapply(this_cycle_cohort:TH, function(this_cohort_this_cycle) { + which_tun_mort <- this_cohort_this_cycle - this_cycle_cohort + 1 + tpm <- f_mk_FixedTunnelMarkovExpand( + tpm = tp[[this_cohort_this_cycle]], # for this model cycle (for this absolute t) + tun_pos = ex_tun_pos, + tun_len = max_tun_len, + tun_mort = this_tun_mort[[which_tun_mort]] + ) + tun_len_this_cohort <- vec_tun_len[this_cycle_cohort] + if (max_tun_len - tun_len_this_cohort > 0) { + row <- ex_tun_pos + tun_len_this_cohort + col_from <- ex_tun_pos + tun_len_this_cohort + 1 + col_to <- ex_tun_pos + max_tun_len + 1 + + p_ft <- tpm[row,col_from] + tpm[row,col_from] <- 0 + tpm[row,col_to] <- p_ft + } + return(tpm) + }) + }) + + return(tp_tun_vary) + +} + + + +# ~ Testing area ---------------------------------------------------------- + +TH <- 100 + +state_names <- c(paste0("line_",1:5),"bsc","dead") + +bl_pop <- c(1,rep(0,length(state_names)-1)) +names(bl_pop) <- state_names + +tp_1L <- rdirichlet(TH,c(95,2 ,0 ,0 ,0 ,3 ,4)) +tp_2L <- rdirichlet(TH,c(0 ,93,6 ,0 ,0 ,3 ,5)) +tp_3L <- rdirichlet(TH,c(0 ,0 ,92,5 ,0 ,3 ,6)) +tp_4L <- rdirichlet(TH,c(0 ,0 ,0 ,91,3 ,3 ,7)) +tp_5L <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,90,3 ,8)) +tp_BSC <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,0 ,85,9)) +tp_dead <- c(0, 0, 0, 0, 0, 0, 1) + +# List of TPMs with some random variation cycle to cycle to simulate time-varying +# transition probabilities between states. Patients can't go backwards in line, and +# can't skip lines except for going straight to bsc (i.e. choosing no treatment/palliative care) +tp <- lapply(1:TH, function(cycle) { + matrix( + c(tp_1L[cycle,], + tp_2L[cycle,], + tp_3L[cycle,], + tp_4L[cycle,], + tp_5L[cycle,], + tp_BSC[cycle,], + tp_dead + ), + nrow = 7, + byrow = TRUE, + dimnames = list(state_names,state_names) + ) +}) + +# some example of the tunnel length vector. This could be derived as floor(TTNT - TTDisc)? +vec_tun_len <- vec_tun_len + +tun_positions <- 1 + + +# Some random numbers for the tunnel death probs to show it varying over time. Note that +# the length goes down as the cycle goes up because there's no point in having numbers +# that go past the time horizon. +# +# The reason it's in an extra layer of list is to illustrate that you will have data +# like this PER TUNNEL STATE. This example is just 1 tunnel. + +l_tun_os <- lapply(1:TH, function(cyc) { + # Make a matrix which contains our (example) random numbers for probability of death + # in tunnel. obviously these will be different when we're using real data. Transpose + # it so we can split it by row afterwards. Speeds things up later so we may as well + # do it now. + + tun_len_this_cohort <- vec_tun_len[cyc] + + tp_i <- t(matrix( + data = rbeta((TH - cyc + 1) * tun_len_this_cohort,10,100), + nrow = TH - cyc + 1, + ncol = tun_len_this_cohort, + byrow = TRUE, + dimnames = list(NULL,NULL) + )) + tp_i <- split(tp_i, rep(1:ncol(tp_i), each = nrow(tp_i))) + names(tp_i) <- NULL + + # extend the values with 100% death probability to capture the impossible people + # who shouldn't be there anyway. This way will force rowsums to be + + if (max_tun_len - tun_len_this_cohort > 0) { + tun_ext_len <- rep(1,max_tun_len - tun_len_this_cohort) + return(lapply(tp_i, function(this_cycle_this_cohort) c(this_cycle_this_cohort, tun_ext_len))) + } else { + return(tp_i) + } + }) + + + +tp_tun1 <- f_mk_varyTunneltpms( + list_tun_os = l_tun_os, + tpm_list = tp, + tun_position = 1, + vec_tun_len = vec_tun_len +) + +# Right - so to add multiple tunnels like this, you need: +# +# A list of mortality for each tunnel state +# +# ...I think that's it? +# +# + +# Just applying the same mortality for now to avoid extra lines for no reason +l_tun_os2 <- l_tun_os + + + + +# Change of tac ----------------------------------------------------------- + +# ok, so upon discussion with Dawn Lee, it's going to be a massive matrix +# which takes the TPs for each cycle given cycles in tunnel and places them in +# a square matrix with dimensions (Ntun * TH_in_cycles) + Nstates. So, if there's +# 4 treatment lines, 3 tunnels between them, BSC+death, weekly CL and 40 year TH, +# the dimensions of the matrix are (3 * (40*52)) + 1 + 1 = (6242X6242) FOR EACH CYCLE, +# so for each treatment pathway we end up with TH 6242x6242 matrices, which then +# go into a Reduce to calculate the trace, and then get discarded. + + +# size_of_mat_in_MB <- format(object.size(matrix( +# rep(runif(6242*6242)), +# nrow=6242, +# ncol=6242 +# )), units="MB", standard="SI") + +# Which results in 311.7 MB of RAM per matrix. 311.7 * 52 * 40 = 648,336 MB (648 GB) of RAM +# requirement to compute the matrices. Obviously this is assuming that there are no 0s, +# which might reduce burden. Let's try it again with a diagonal matrix + +# size_of_diag_in_MB <- format(object.size(diag(runif(6242))), units="MB", standard="SI") + +# Sadly it's the same amount of RAM...Let's check the size of it actually IN RAM: + +# M <- diag(runif(6242)) + +# Yep, 311MB of RAM per matrix. +# +# library(biganalytics) +# +# M <- as.big.matrix(M) +# +# rep(1,6242) %*% M +# +# Pt <- t(as.matrix(rep(1,6242))) +# +# # Calculate the next row using M and : +# PM <- bigalgebra::dgemm(A = Pt,B = M) +# PM2 <- bigalgebra::dgemm(A = PM,B = M) +# PM3 <- bigalgebra::dgemm(A = PM2,B = M) + +# This would work but would obviously slow things down. + + +state_names <- state_names[c(1:4,6:7)] + + +tp_1L <- rdirichlet(TH,c(95,2 ,0 ,0 ,1 ,4)) +tp_2L <- rdirichlet(TH,c(0 ,93,6 ,0 ,1 ,5)) +tp_3L <- rdirichlet(TH,c(0 ,0 ,92,5 ,1 ,6)) +tp_4L <- rdirichlet(TH,c(0 ,0 ,0 ,91,1 ,7)) +tp_5L <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,90,8)) +tp_BSC <- rdirichlet(TH,c(0 ,0 ,0 ,0 ,88,9)) +tp_dead <- c(0, 0, 0, 0, 0, 1) + +# List of TPMs with some random variation cycle to cycle to simulate time-varying +# transition probabilities between states. Patients can't go backwards in line, and +# can't skip lines except for going straight to bsc (i.e. choosing no treatment/palliative care) +tp <- lapply(1:TH, function(cycle) { + matrix( + c(tp_1L[cycle,], + tp_2L[cycle,], + tp_3L[cycle,], + tp_4L[cycle,], + tp_BSC[cycle,], + tp_dead + ), + nrow = length(state_names), + byrow = TRUE, + dimnames = list(state_names,state_names) + ) +}) + + +# Load in the TPs and tidy columns up +TPs <- openxlsx::read.xlsx("./1_Data/TP matrix expansion toy example.xlsm",sheet = "Sheet3") +TPs <- as.data.table(TPs) +colnames(TPs) <- c("c", unlist(lapply(1:4, function(x){paste0("L",x,c("_disc", "_next", "_death"))})),"NT_death") + + +TP1 <- matrix(nrow=10,ncol=10) +TP1[is.na(TP1)] <- 0 + +cyc <- as.numeric(TPs[1,]) +nc <- dim(TP1)[2] + +TP1[1,1] <- 1-sum(cyc[2:4]) +TP1[1,c(2:3,nc)] <- cyc[2:4] + +TP1[2,2] <- TP1[1,1] + TP1[1,2] +TP1[2,c(3,nc)] <- TP1[1,c(3,nc)] + +c3 <- c(4:5,nc) +TP1[3,3] <- 1-sum(cyc[5:7]) +TP1[3,c3] <- cyc[5:7] + +TP1[4,4] <- TP1[3,3] + TP1[3,4] +TP1[4,c(5,nc)] <- TP1[3,c(5,nc)] + +c5 <- c(6:7,nc) +TP1[5,5] <- 1-sum(cyc[8:10]) +TP1[5,c5] <- cyc[8:10] + +TP1[6,6] <- TP1[5,5] + TP1[5,6] +TP1[6,c(7,nc)] <- TP1[5,c(7,nc)] + +c7 <- c(8:9,nc) +TP1[7,7] <- 1-sum(cyc[11:13]) +TP1[7,c7] <- cyc[11:13] + +TP1[8,8] <- TP1[7,7] + TP1[7,8] +TP1[8,c(9,nc)] <- TP1[7,c(9,nc)] + +TP1[9,9] <- 1-cyc[14] +TP1[9,10] <- cyc[14] + +TP1[nc,nc] <- 1 + +# Test: +all(round(rowSums(TP1),12)==1) + + + +# ~ repeating the VBA provided -------------------------------------------- + +# Three macros are provided, one to create the empty matrix, one to populate it +# and one to test it. let's replicate that action. +# +# The action is essentially to insert a set of diagonal matrices into the full matrix. +# These should have NO overlap with each other as it should be impossible to transition +# between them +# + + + + +# Create matrix outline + +TH <- nrow(TPs) + +# Define all the labels for the rows and columns +labs <- c( + "L1_on", + "L1_off", + paste0("L2_on_c" ,1:TH), + paste0("L2_off_c",1:TH), + paste0("L3_on_c" ,1:TH), + paste0("L3_off_c",1:TH), + paste0("L4_on_c" ,1:TH), + paste0("L4_off_c",1:TH), + paste0("BSC_c" ,1:TH), + "dead" +) + +len_m <- length(labs) + +# Let's start with a diagonal matrix - 100% chance of staying in the same state for +# all states. +M <- diag(nrow=len_m) +M[row(M) == col(M)] <- 0 +dimnames(M) <- list(labs,labs) +# all(rowSums(M) == 0) # test that it's full of zeros - it is a lot faster to do this +# than it is to make a matrix full of NAs and replace all NAs (the default) with 0s +M[len_m,len_m] <- 1 + +# Size of this matrix in MB - not actually that bad, it's only 1.6GB of RAM! +# Make sure to only use this method when there's lots of RAM to spare, or it could error out. +format(object.size(M),units = "GB") + +# Rows 3 onward in this matrix are FIXED. once this matrix is populated, the values +# in the following cells can simply be replaced and the vector of state residency +# can then be multiplied by the updated matrix to produce the next state residency. +# +# This is a huge efficiency gain and ensures that all transitions are taken into +# account. If the matrix gets really really big, we can use the "bigmemory" package +# which can perform matrix multiplication of files using C++ directly. This performs +# the operation on disk so you're only limited by your HDD. +# +# Rows 1 and 2 control line 1, and are simply replaced with movement probabilities +# from the associated row of TPs above each cycle. the rest of the matrix +# is populated systematically once. + +gc() + +# Let's have a bit of a sneak peak before we get started. a big square of zero values. +f_mat_firstFewCells <- function(mat, n=10) {mat[1:n,1:n]} + +f_mat_firstFewCells(M) + +# I programmed a function based on an excellent stack exchange answer, credit to David Arenburg at +# https://stackoverflow.com/questions/28746023/how-to-insert-values-from-a-vector-diagonally-into-a-matrix-in-r +# which taught me that one can insert values into an existing matrix in a vectorised manner +# by supplying a matrix of co-ordinate values to the square brackets, like so: +# +# A[co_ord_matrix] <- b +# +# where A is a matrix (not necessarily square), and b is a vector whcih must of course +# fit inside the matrix. the co_ord_matrix must be 2 columns and must contain +# integer values. +# +# Function is called f_vec_in_mat_diag and it is a huge time and effort saver here +# because we only need to know the top left element location for each vector +# to insert to the big matrix! +# +# + +source("./3_Functions/misc/matrix.R") + +# For example, in our case, the "stay in tunnel" probabilities for line 2 on treatment +# tunnel start from element 3,4 in M. to demonstrate with some dummy data: + +f_vec_in_mat_diag( + A = matrix(nrow = 10,ncol = 10), + b = 1:5, + start_row = 3, + start_col = 4, + direction = "se" +) + +# As the values inside of the matrix all come from or are derived from the +# table TPs, all I need to know is positions within the matrix (top left cells only). + + +# So these are the top left cells for each tunnel + +n_tun_excluding_bsc <- 3 +lab_co_ord <- unlist(lapply(1:n_tun_excluding_bsc, function(tun) { + line_on <- paste0("tun ",tun+1,"L on") + line_on <- paste0(line_on,c(" stay", " stop", " next")) + line_off <- paste0("tun ",tun+1,"L off") + line_off <- paste0(line_off,c(" stay", " next")) + c(line_on, line_off) +})) + +lab_co_ord <- c(lab_co_ord, "tun BSC survive") + +# the rows and columns in the workbook and found these are numbered manually. There +# are gaps of 1 row or column in there (row sums to 0 in excel). +tl_cells <- matrix( + c( + 3 , 4 , # tun 2L on stay + 3 , 2084 , # tun 2L on stop + 3 , 4163 , # tun 2L on next + 2083 , 2084 , # tun 2L off stay + 2083 , 4163 , # tun 2L off next + + 4163 , 4164 , # tun 3L on stay + 4163 , 6244 , # tun 3L on stop + 4163 , 8323 , # tun 3L on next + 6243 , 6244 , # tun 3L off stay + 6243 , 8323 , # tun 3L off next + + 8323 , 8324 , # tun 4L on stay + 8323 , 10404, # tun 4L on stop + 8323 , 12483, # tun 4L on next + 10403, 10404, # tun 4L off stay + 10403, 12483, # tun 4L off next + + 12483, 12484 # tun BSC survive + ), + nrow = (n_tun_excluding_bsc*5 + 1), + byrow = TRUE, + dimnames = list(lab_co_ord,c("row", "col")) +) + +# now, we could systematically derive these using the time horizon TH and the +# non-time-horizon bounded rows (i.e. the first 2). something like: + +nt <- 2 + +# For each tunnel except the last one which has no adjoining tunnel, there are +# 4 transitions, stay, discontinue, next line and death whilst in the on-treatment tunnel. +# there are 3 transitions (stay, next, death) when in the off-treatment tunnel. +# Death transitions are always in the final column of the matrix, so can be +# handled separately. we're only concerned with the diagonal bits for now! +# +# As a toy example, let's calculate the rows and columns above using parameters: +# +# tun_n, the tunnel number (starting from 1) +# nt non tunnel rows (always the number of rows of non-tunnel states that come +# before the first tunnel in the series) +# TH time horizon in cycles +# TPs our table of transition probabilities + +# First tunnel: +tun_n <- 1 + +tun_r_on <- sum(nt , ((2*TH) * (tun_n-1)), 1) +tun_c_on_stay <- sum(nt + 1, ((2*TH) * (tun_n-1)), 1) +tun_c_on_stop <- tun_c_on_stay + TH +tun_c_on_next <- tun_c_on_stop + TH - 1 +tun_r_off <- tun_c_on_stop - 1 +tun_c_off_stay <- tun_c_on_stop +tun_c_off_next <- tun_c_on_next + +# Checked and these match + + +# second tunnel: +tun_n <- 2 + +tun_r_on <- sum(nt , ((2*TH) * (tun_n-1)), 1) +tun_c_on_stay <- sum(nt + 1, ((2*TH) * (tun_n-1)), 1) +tun_c_on_stop <- tun_c_on_stay + TH +tun_c_on_next <- tun_c_on_stop + TH - 1 +tun_r_off <- tun_c_on_stop - 1 +tun_c_off_stay <- tun_c_on_stop +tun_c_off_next <- tun_c_on_next + + +# Looks good. Let's formalise into a second function which gets our upper left +# element indices for us, given a tunnel and a time horizon: +# +# ASSUMES THE TUN N IS FOR THE NEXT LINE, tun_n 1 is 2L etc +# + +f_diag_mat_topleftFinder <- function(tun_n, TH, nt) { + + # Calculate the indices + tun_r_on <- sum(nt , ((2*TH) * (tun_n-1)), 1) + tun_c_on_stay <- sum(nt + 1, ((2*TH) * (tun_n-1)), 1) + tun_c_on_stop <- tun_c_on_stay + TH + tun_c_on_next <- tun_c_on_stop + TH - 1 + tun_r_off <- tun_c_on_stop - 1 + tun_c_off_stay <- tun_c_on_stop + tun_c_off_next <- tun_c_on_next + + # Put them in a matrix that can be used for co-ordinates in f_vec_in_mat_diag + outMat <- matrix( + c( + tun_r_on , tun_c_on_stay, + tun_r_on , tun_c_on_stop, + tun_r_on , tun_c_on_next, + tun_r_off, tun_c_off_stay, + tun_r_off, tun_c_off_next + ), + nrow = 5, + byrow = TRUE, + dimnames = list( + c( + paste0(paste0("tun ",tun_n + 1,"L on "),c("stay","stop","next")), + paste0(paste0("tun ",tun_n + 1,"L off "),c("stay","next")) + ), + c("row", "col") + ) + ) + return(outMat) +} + +# updated version with naming of the rows that lines up with the columns in tp +# defined during f_seq_tpm_compiler +f_diag_mat_topleftFinder2 <- function(tun_n, TH, nt) { + + # Calculate the indices + tun_r_on <- sum(nt , ((2*TH) * (tun_n-1)), 1) + tun_c_on_stay <- sum(nt + 1, ((2*TH) * (tun_n-1)), 1) + tun_c_on_stop <- tun_c_on_stay + TH + tun_c_on_next <- tun_c_on_stop + TH - 1 + tun_r_off <- tun_c_on_stop - 1 + tun_c_off_stay <- tun_c_on_stop + tun_c_off_next <- tun_c_on_next + + # Put them in a matrix that can be used for co-ordinates in f_vec_in_mat_diag + outMat <- matrix( + c( + tun_r_on , tun_c_on_stay, + tun_r_on , tun_c_on_stop, + tun_r_on , tun_c_on_next, + tun_r_off, tun_c_off_stay, + tun_r_off, tun_c_off_next + ), + nrow = 5, + byrow = TRUE, + dimnames = list( + c( + paste0(paste0("L",tun_n + 1,"_"),c("stay","disc","next"),"_on"), + paste0(paste0("L",tun_n + 1,"_"),c("stay","next"),"_off") + ), + c("row", "col") + ) + ) + return(outMat) +} + + +# Now we can much more easily get our top lefts for all our diagonals: + +tl_tun1 <- f_diag_mat_topleftFinder( + tun_n = 1, + TH = TH, + nt = 2 +) + + + +# We can then cycle down this list putting values into M using our function. +# For instance, let's do the "stay" probability for the first tunnel, which is +# derived by whats left in TPs: + +TPs$L1_stay <- 1-rowSums(TPs[,list(L1_disc, L1_next, L1_death)]) +TPs$L2_stay <- 1-rowSums(TPs[,list(L2_disc, L2_next, L2_death)]) +TPs$L3_stay <- 1-rowSums(TPs[,list(L3_disc, L3_next, L3_death)]) +TPs$L4_stay <- 1-rowSums(TPs[,list(L4_disc, L4_next, L4_death)]) +TPs$BSC_death <- 1-TPs$BSC_stay + +# CHECK: sum to 1 +all(round(TPs$L1_stay + TPs$L1_disc + TPs$L1_next + TPs$L1_death,12) == 1) +all(round(TPs$L2_stay + TPs$L2_disc + TPs$L2_next + TPs$L2_death,12) == 1) +all(round(TPs$L3_stay + TPs$L3_disc + TPs$L3_next + TPs$L3_death,12) == 1) +all(round(TPs$L4_stay + TPs$L4_disc + TPs$L4_next + TPs$L4_death,12) == 1) +all(round(TPs$BSC_stay + TPs$BSC_death,12) == 1) + + +# Let's try and do the whole first tunnel state with the first cycle +# probabilities for 1L treatment as well, and with mortality + +# 1L TPM for cycle 1. Let's just calculate L1 stay to avoid having to do it again and again + + +# Now pull out the right TPs for the first cycle, and taking the same strategy as +# for the diagonal values, apply the values all at the same time: +# +# Remember, this is for 1L for each cycle +cyc_prob <- as.list(TPs[1,]) +vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE +) +M[vals_1L[,1:2]] <- vals_1L[,3] + + +# Check on first 2 rows: +all(rowSums(M[1:2,])==1) + +# Great, so the above is our process once per cycle to update the matrix before +# running it again. All we now need to do is correctly build the rest of the matrix +# just once + + +# 2L tunnel, staying in the tunnel: +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L2_stay, + start_row = tl_tun1["tun 2L on stay","row"], + start_col = tl_tun1["tun 2L on stay","col"], + direction = "se" +) + +# 2L tunnel: Discontinuation (period of no treatment between lines i.e., 2L-->off trt | 2L) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L2_disc, + start_row = tl_tun1["tun 2L on stop","row"], + start_col = tl_tun1["tun 2L on stop","col"], + direction = "se" +) +# Move directly to next line (i.e. 2L-->3L | 2L) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L2_next, + start_row = tl_tun1["tun 2L on next","row"], + start_col = tl_tun1["tun 2L on next","col"], + direction = "se" +) + +# Die within the tunnel +M[tl_tun1["tun 2L on stop","row"]:(tl_tun1["tun 2L on stop","row"] + TH - 1),len_m] <- TPs$L2_death + +f_mat_firstFewCells(M) + +# ok so does it add to 1? + +all(rowSums(M[1:50,])==1) + + +# Brilliant, we've successfully added the first on-treatment tunnel. Now let's add the off-treatment +# tunnel state +M <- f_vec_in_mat_diag( + A = M, + b = 1 - TPs$L2_next - TPs$L2_death, + start_row = tl_tun1["tun 2L off stay","row"], + start_col = tl_tun1["tun 2L off stay","col"], + direction = "se" +) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L2_next, + start_row = tl_tun1["tun 2L off next","row"], + start_col = tl_tun1["tun 2L off next","col"], + direction = "se" +) +M[tl_tun1["tun 2L off next","row"]:(tl_tun1["tun 2L off next","row"] + TH - 1),len_m] <- TPs$L2_death + + +# Still testing well to 12 dp +all(round(rowSums(M[tl_tun1["tun 2L off next","row"]:(tl_tun1["tun 2L off next","row"]+TH),]),12)==1) + + +# So, we've done it! we've added the first tunnel state representing second-line treatment + + +# Now, let's add the second tunnel state (3L treatment) + + +tl_tun2 <- f_diag_mat_topleftFinder( + tun_n = 2, + TH = TH, + nt = 2 +) + +# 3L on-treatment tunnel: +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L3_stay, + start_row = tl_tun2["tun 3L on stay","row"], + start_col = tl_tun2["tun 3L on stay","col"], + direction = "se" +) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L3_disc, + start_row = tl_tun2["tun 3L on stop","row"], + start_col = tl_tun2["tun 3L on stop","col"], + direction = "se" +) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L3_next, + start_row = tl_tun2["tun 3L on next","row"], + start_col = tl_tun2["tun 3L on next","col"], + direction = "se" +) +M[tl_tun2["tun 3L on stop","row"]:(tl_tun2["tun 3L on stop","row"] + TH - 1),len_m] <- TPs$L3_death + +# Off treatment tunnel: +M <- f_vec_in_mat_diag( + A = M, + b = 1 - TPs$L3_next - TPs$L3_death, + start_row = tl_tun2["tun 3L off stay","row"], + start_col = tl_tun2["tun 3L off stay","col"], + direction = "se" +) +M <- f_vec_in_mat_diag( + A = M, + b = TPs$L3_next, + start_row = tl_tun2["tun 3L off next","row"], + start_col = tl_tun2["tun 3L off next","col"], + direction = "se" +) +M[tl_tun2["tun 3L off next","row"]:(tl_tun2["tun 3L off next","row"] + TH - 1),len_m] <- TPs$L3_death + + +all(round(rowSums(M[1:8000,]),12) == 1) + +# Amazing, we've done it! + + + + +# Formalising into a function --------------------------------------------- + + +# The function will have 4 steps: +# +# 1. Calculate the extra columns needed from TPs +# 2. Pre-define the matrix M (with the labs) +# 3. Compute the top left element in M associated with each tunnel +# 4. populate the tunnels for on treatment, off treatment, and BSC +# + + +#' Function to compile a transition probability matrix defining a treatment sequence, +#' where patients move from one treatment to the next, or straight to death, including +#' on and off treatment tunnels +#' +#' WARNING: This function assumes the cycle number is 1. This is because you should not +#' be replacing the TPM M including the whole tunnel each cycle, Instead +#' use this function just once at baseline to create the structure, and then +#' replace the required values for transitions relating to first-line therapy +#' +#' @param tp table of TPs. MUST have 1st column cycle, then disc, next, death sequentially for each line, plus no treatment death (e.g. t L1_disc L1_next L1_death L2_disc...NT_death) +#' @param n_lines Number of treatment lines total, including first line and BSC (for 4 tunnels this is 6 with BSC) +#' @param include_bsc whether to include a tunnel for BSC or skip BSC. If FALSE tp has one less column +#' @param nt non-tunnel rows - almost always 2, but if for example second line is not a tunnel (e.g. relapse and remission model) then it would be 4 and tunnels would start at 3L +#' +#' +f_seq_tpm_compiler <- function(tp, n_lines, include_bsc = TRUE, nt = 2) { + + require(data.table) + require(collapse) + + # Assume cycle = 1, which it should be every time this function is used! + c <- 1 + + # calculate time horizon based on TP table + TH <- nrow(tp) + + target_col <- 1 + (n_lines * 3) + if(include_bsc) target_col <- target_col + 1 + + if(include_bsc==FALSE) stop("Excluding BSC is not supported yet, please contact Darren Burns or Dawn Lee to build it!") + + # Validation - the number of columns of tp should adhere to the documentation above the function + if(ncol(tp) != target_col) stop( + paste0( + "tp should have ", + target_col, + " columns, but it has ", + ncol(tp), + ". Please ensure that the columns are cycle number, 1L discontinuation, 1L next treatment, 1L death, 2L discontinuation ... no-treatment death" + ) + ) + + cat("re-labelling columns in tp\n") + # Right, now we can get on with it. Start by auto-labelling the columns: + colnames(tp) <- c( + "c", + unlist(lapply(1:n_lines, function(tline) { + paste0(paste0("L",tline,"_"),c("disc","next","death")) + })), + "BSC_death" + ) + + # data.tables are easier to work with when lots of rows: + if(class(tp)[1] != "data.table") tp <- as.data.table(tp) + + cat("calculating extra columns in tp\n") + # now we have standardised naming and standardised format, we can calculate + # our new columns: + tp$L1_stay <- 1-rowSums(tp[,list(L1_disc, L1_next, L1_death)]) + tp$L2_stay <- 1-rowSums(tp[,list(L2_disc, L2_next, L2_death)]) + tp$L3_stay <- 1-rowSums(tp[,list(L3_disc, L3_next, L3_death)]) + tp$L4_stay <- 1-rowSums(tp[,list(L4_disc, L4_next, L4_death)]) + tp$BSC_stay <- 1-tp$BSC_death + + # Now we have probability to stay in state as well as probability of + # discontinuation, next line, death, the rest is putting these columns + # or derivitives of them into M + + cat("Generating matrix M\n") + # Part 2: define M (WARNING this uses a lot of RAM) + labs <- c( + "L1_on", + "L1_off", + unlist(lapply(2:n_lines, function(tline) { + c(paste0("L",tline,"_on_c" ,1:TH),paste0("L",tline,"_off_c" ,1:TH)) + })), + paste0("BSC_c" ,1:TH), + "dead" + ) + len_m <- length(labs) + M <- diag(nrow=len_m) + M <- recode_num(M,`1`=0) + M[len_m,len_m] <- 1 + + cat("Finding diagonal start points\n") + # Part 3: compute top left elements + tl_elements <- do.call( + rbind, + lapply(1:(n_lines-1), function(i_tun) { + f_diag_mat_topleftFinder2( + tun_n = i_tun, + TH = TH, + nt = 2 + ) + }) + ) + + cat("Generating co-ordinates for entry into M\n") + # Add in BSC. patients coming out of the last line of therapy go in here if they're not dead, + # so it's the same row, next column as the end of the lad + tl_elements <- rbind( + tl_elements, + matrix( + data = c(max(tl_elements[, "col"]), max(tl_elements[, "col"]) + 1), + nrow = 1, + dimnames = list("BSC_stay", c("row", "col")) + ) + ) + + # Part 4: populate the matrix: + # + # Right, now we have all the data, all the starting points, and the matrix + # to put it all into. We can proceed to populate. Let's start with first line + # as it's different from the others. + cyc_prob <- as.list(tp[1,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + + + # Note that we made all of our replacements in one go by making a matrix + # of matrix co-ordinates and values. If we do the same for all of our values + # we can enter all of the data needed in one fell swoop. We're therefore going + # to take the same approach as above, but on a bigger scale! + # + # To do that, we're going to do what we did in the definition of our function + # f_vec_in_mat_diag - make an index, and then compile our co-ordinate matrix + # using one row of tl_elements at a time along with the paired data from tp. + # + # The index will always be 0 to TH-1, and will simply be added to the start + # row and start column to identify where to put the data. + + indx <- 0:(TH - 1) + + co_ordinate_matrix <- lapply(1:(n_lines-1), function(line_n) { + # Identify the starting points for this line: + tl_tun <- f_diag_mat_topleftFinder( + tun_n = line_n, + TH = TH, + nt = nt + ) + + # Make an id for this line which is used to put the right numbers in the right place + tline <- paste0("L",line_n) + + # Pull out the columns to put as the data to enter in M + line_tp <- as.matrix(tp)[,which(grepl(tline,colnames(tp)))] + p_stay <- line_tp[,which(grepl("stay",colnames(line_tp)))] + p_disc <- line_tp[,which(grepl("disc",colnames(line_tp)))] + p_next <- line_tp[,which(grepl("next",colnames(line_tp)))] + p_death <- line_tp[,which(grepl("death",colnames(line_tp)))] + + # make co-ordinate values and value values for the on and off treatment tunnels including + # death for this treatment line: + co_list <- list( + # on-treatment tunnel: + co_on_stay = matrix(c(tl_tun[1,"row"] + indx, tl_tun[1,"col"] + indx,p_stay) ,ncol=3), + co_on_disc = matrix(c(tl_tun[2,"row"] + indx, tl_tun[2,"col"] + indx,p_disc) ,ncol=3), + co_on_next = matrix(c(tl_tun[3,"row"] + indx, tl_tun[3,"col"] + indx,p_next) ,ncol=3), + co_on_death = matrix(c(tl_tun[1,"row"] + indx, rep(len_m,TH) ,p_death),ncol=3), + + # Off treatment tunnel for this line: + co_off_stay = matrix(c(tl_tun[4,"row"] + indx, tl_tun[4,"col"] + indx,1 - p_death - p_next),ncol=3), + co_off_next = matrix(c(tl_tun[5,"row"] + indx, tl_tun[5,"col"] + indx,p_next ),ncol=3), + co_off_death = matrix(c(tl_tun[4,"row"] + indx, rep(len_m,TH) ,p_death ),ncol=3) + ) + + # Bind it together into one larger matrix and return that + return(do.call(rbind,co_list)) + }) + + # co_ordinate_matrix now has all of the data organised except for the BSC tunnel. + # We add that here: + + bsc_start_point <- f_diag_mat_topleftFinder( + tun_n = n_lines - 1, + TH = TH, + nt = nt + )[5,"col"] + co_bsc <- rbind( + matrix(c(bsc_start_point + indx, bsc_start_point + 1 + indx,tp$BSC_stay) ,ncol=3), + matrix(c(bsc_start_point + indx, rep(len_m,TH) ,tp$BSC_death) ,ncol=3) + ) + + # bind it all together into one massive matrix of co-ordinates and values: + co_ordinate_matrix <- rbind(do.call(rbind,co_ordinate_matrix),co_bsc) + + cat("Inserting values\n") + # Put all values into the matrix in one vectorised command: + M[co_ordinate_matrix[,1:2]] <- co_ordinate_matrix[,3] + + # The last cycle of BSC is an issue, stay probability isn't entered. + M[len_m-1,len_m-1] <- 1-M[len_m-1,len_m] + + # Well all rows sum to 1 so that's promising: + # all(round(rowSums(M),10) == 1) # reads true: + return(M) + +} + +TPs <- openxlsx::read.xlsx("./1_Data/TP matrix expansion toy example.xlsm",sheet = "Sheet3") +TPs <- as.data.table(TPs) +colnames(TPs) <- c("c", unlist(lapply(1:4, function(x){paste0("L",x,c("_disc", "_next", "_death"))})),"NT_death") + + + +M <- f_seq_tpm_compiler( + tp = TPs, + n_lines = 4, + include_bsc = TRUE, + nt = 2 +) + +# Test that all rows sum to 1 each: +all(round(rowSums(M),10) == 1) + +# Generating a trace ------------------------------------------------------ + +# Now that we have M for cycle 1 we can follow this process: +# +# 1. update 1L transitions in M +# 2. p_(t-1) %*% M +# +# That's it! + +tr <- matrix( + ncol = dim(M)[2], + nrow = TH +) +tr[is.na(tr)] <- 0 +tr[1,1] <- 1 + + +# names(bl_pop) <- dimnames(M)[[2]] + +TPs$L1_stay <- 1-rowSums(TPs[,list(L1_disc, L1_next, L1_death)]) + + +TRACE <- Reduce( + x = 1:10, + init = tr, + accumulate = FALSE, + f = function(prev, c) { + if (c == 1) { + # If it's the first cycle, we've calculated M already so just multiply it + cat("cycle 1\n") + prev[2,] <- as.numeric(prev[1,] %*% M) + return(prev) + } else { + # It's a subsequent cycle, replace the necessary cells for first line, everything else stays the same: + cat(paste0("cycle ",c,"\n")) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + + prev[c+1,] <- as.numeric(prev[c,] %*% M) + return(prev) + } + } +) + +# HAHAHAHA we did it Dawn! It's slow but it's correct I think, at least for the first few cycles! +rowSums(TRACE[1:10,]) +f_mat_firstFewCells(TRACE) + + +# now, this is unacceptably slow, so we need to figure out how to optimise it a bit: + +library(profvis) + +# The assignment of values to a large matrix is the problem: + +microbenchmark::microbenchmark( + get_value = {M[vals_1L[,1:2]]}, + set_value = {M[vals_1L[,1:2]] <- vals_1L[,3]}, + times = 1000 +) + +# To prove it: + + +profvis({ + TRACE <- Reduce( + x = 1:10, + init = tr, + accumulate = FALSE, + f = function(prev, c) { + if (c == 1) { + # If it's the first cycle, we've calculated M already so just multiply it + cat("cycle 1\n") + prev[2,] <- as.numeric(prev[1,] %*% M) + return(prev) + } else { + # It's a subsequent cycle, replace the necessary cells for first line, everything else stays the same: + cat(paste0("cycle ",c,"\n")) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + + prev[c+1,] <- as.numeric(prev[c,] %*% M) + return(prev) + } + } + ) +}) + +# So, 200/280 ms spent assigning values to M, even though it's only a few. This is because +# R copy and pastes the ENTIRE 1.6GB matrix to do this (See R documentation for proof of this) +# +# link: https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Subset-assignment +# also: https://github.com/r-lib/R6/issues/201 +# +# Now, there is a package called bigalgebra, which stores the matrix in a reference +# style object, meaning making changes to that object will not copy paste it, but will +# change the values directly on the disc or in RAM. that's what we need here. +# +# This package also has the ability to perform matrix multiplication, but ONLY on +# to +# + +M <- as.big.matrix(M) + +# tr[[1]] %*% M now doesn't work, but bigalgebra has a function for it + +tr2 <- list(as.big.matrix(matrix(tr[1,],nrow = 1))) + +# We would do bigalgebra::dgemm(A=tr[[1]],B=M) to matrix multiply the vector +# of state occupancy in t-1 by the matrix M which has been updated to incorporate +# this cycle's first-line transitions + +i <- list(tr=tr2,M=M) + +profvis({ + TRACE <- Reduce( + x = 1:10, + init = i, + accumulate = FALSE, + f = function(prev, c) { + if (c == 1) { + # If it's the first cycle, we've calculated M already so just multiply it + cat("cycle 1\n") + prev$tr[[2]] <- bigalgebra::dgemm(A = prev$tr[[1]],B = prev$M) + return(prev) + } else { + # It's a subsequent cycle, replace the necessary cells for first line, everything else stays the same: + cat(paste0("cycle ",c,"\n")) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + + # Assign values to the big.matrix which ONLY changes those values and + # doesn't copy paste the matrix to do it. + + prev$M[vals_1L[,1:2]] <- vals_1L[,3] + + prev$tr[[c+1]] <- bigalgebra::dgemm(A = prev$tr[[c]],B = prev$M) + return(prev) + } + } + ) +}) + + +TRACE$tr <- do.call(rbind,lapply(TRACE$tr,bigmemory::as.matrix)) + +rowSums(TRACE$tr) +f_mat_firstFewCells(TRACE$tr) + +# Great, so there's a big computational gain, I think about 5x quicker on my computer. +# +# Now let's have a look at just how long this is going to take to compute using 50 cycles +# as a starting point: +# + +t1 <- Sys.time() + +tr3 <- tr2[[1]] + +# reset M +cyc_prob <- as.list(TPs[1,]) +vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE +) +M[vals_1L[,1:2]] <- vals_1L[,3] + +f_mat_firstFewCells(M) + +# See what +TRACE <- Reduce( + x = 1:50, + init = tr3, + accumulate = TRUE, + f = function(prev, c) { + cat(paste0("cycle ",c,"\n")) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + return(bigalgebra::dgemm(A = prev,B = M)) + } +) + + +t2 <- Sys.time() + +print(t2 - t1) + +t50 <- as.numeric(t2 - t1) + +# Running one treatment line with this method will take approximately this much time: +t_th_mins <- ((t50 / 50) * TH) / 60 +print(paste0("Matrix method - one trace for one treatment line will take approximately ", round(t_th_mins,2), " minutes to run...")) + +TRACE <- do.call(rbind,lapply(TRACE,bigmemory::as.matrix)) +f_mat_firstFewCells(TRACE) +round(rowSums(TRACE),12) == 1 + + +# Hmm...that's a long time! This might be one of those cases where it's actually +# better to use a for loop: + +tr4 <- tr2 + +t3 <- Sys.time() +for (c in 1:50) { + cat(paste0("cycle ",c,"\n")) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + tr4[[c+1]] <- bigalgebra::dgemm(A = tr4[[c]],B = M) +} +t4 <- Sys.time() + +t_th_mins <- ((as.numeric(t4 - t3) / 50) * TH) / 60 +print(paste0("Matrix method for loop - one trace for one treatment line will take approximately ", round(t_th_mins,2), " minutes to run...")) + +tr4 <- do.call(rbind,lapply(tr4,bigmemory::as.matrix)) +f_mat_firstFewCells(tr4) +round(rowSums(tr4),12) == 1 + + + +# Sparse matrices --------------------------------------------------------- + +# I have since learned that in R you can have sparse matrices if you use the Matrix package + +library(Matrix) + +M <- Matrix( + as.matrix(M), + sparse = TRUE +) + + +pb <- txtProgressBar( + min = 1, + max = TH-1, + initial = 1, + width = 50, + style = 3, + char = "=" +) + +out_list <- Reduce( + x = 1:(TH-1), + init = list( + tpm = M, + p = list(Matrix(tr[1,],nrow = 1,sparse = T)) + ), + accumulate = FALSE, + f = function(prev,c) { + setTxtProgressBar(pb, c) + cyc_prob <- as.list(TPs[c,]) + vals_1L <- matrix( + c( + 1, 1 , .subset2(cyc_prob,"L1_stay"), + 1, 2 , .subset2(cyc_prob,"L1_disc"), + 1, 3 , .subset2(cyc_prob,"L1_next"), + 1, len_m, .subset2(cyc_prob,"L1_death"), + 2, 2 , .subset2(cyc_prob,"L1_stay") + .subset2(cyc_prob,"L1_disc"), + 2, 3 , .subset2(cyc_prob,"L1_next"), + 2, len_m, .subset2(cyc_prob,"L1_death") + ), + nrow = 7, + byrow = TRUE + ) + prev$tpm[vals_1L[,1:2]] <- vals_1L[,3] + + prev$p[[c+1]] <- prev$p[[c]] %*% prev$tpm + + return(prev) + + } +) + +close(pb) + +# To get the trace: +sparse_trace <- matrix(unlist(lapply(out_list$p,as.numeric),use.names = F),nrow = TH,byrow = TRUE) +f_mat_firstFewCells(sparse_trace) +rowSums(sparse_trace) + diff --git a/2_Scripts/standalone scripts/markov modelling/simple_markov.R b/2_Scripts/standalone scripts/markov modelling/simple_markov.R new file mode 100644 index 0000000..dc26dc5 --- /dev/null +++ b/2_Scripts/standalone scripts/markov modelling/simple_markov.R @@ -0,0 +1,187 @@ + +# this script is designed to run through the progression of more and more complicated +# Markov models, to ultimately arrive at what we need for this project (the most complex case). +# This final case is: +# +# Extended Markov model - time-varying transitions and multiple time-varying tunnel states of time-varying duration +# +# - transition probability matrix for each model cycle to represent MSM results (???) +# - Including multiple tunnel states between the main health states (treatment lines) +# - Tunnel states change length per model cycle so sometimes its 2 cycles, sometimes its 3 kind of thing +# - depends on the difference between time to next treatment (TTNTx) and time to discontinuation (TTDisc) +# - both of these curves change over time per extrapolations, so tunnel changes length every cycle +# - possibility that mortality inside tunnel is dependent on multiple things: +# - The time that the cohort slice entered the tunnel (i.e., t_enter) +# - The time inside the tunnel given t_enter (t_tun | t_enter) +# - Exiting tunnel is only possible for 2 reasons +# - Death per (OS | t_tun, t_enter) +# - Made it to the end of the tunnel and go to the next treatment line +# +# + + +# Libraries --------------------------------------------------------------- + +library(tidyverse) +library(data.table) +library(gtools) + +# Simplest Markov model --------------------------------------------------- + +# Simply repeatedly multiplying vector by matrix: + +p <- c(1,0,0) + +tpm <- matrix( + c(0.99, 0.005 , 0.005, + 0.001, 0.9 , 0.099, + 0 , 0 , 1), + nrow = 3, + byrow = TRUE +) + +res_as_list <- Reduce( + x = 1:1000, + init = p, + accumulate = TRUE, + f = function(previous_cycle_result, cycle_number) { + previous_cycle_result %*% tpm + } +) + +res <- do.call(rbind,res_as_list) + +# This is the trace, showing the proportion of people in each state at each time: +res + +colnames(res) <- c("sick", "sicker", "dead") + + +res <- as.data.table(res) + +res$cycle <- 0:(dim(res)[1]-1) + +plot_res <- melt(res,id.vars = "cycle",variable.name = "state",value.name = "pop") + + + +p1 <- ggplot(plot_res,aes(x = cycle, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + +# Time-varying tpms ------------------------------------------------------- + +# Just for an example, make some probabilistic draws + +row_1 <- gtools::rdirichlet(1000,round(c(0.99 , 0.005, 0.005)*1000)) +row_2 <- gtools::rdirichlet(1000,round(c(0.001, 0.9 , 0.099)*1000)) +row_3 <- c(0,0,1) + +tpm <- lapply(1:1000, function(cyc) { + matrix( + c( + row_1[cyc,], + row_2[cyc,], + row_3 + ), + nrow = 3, + byrow = TRUE + ) +}) + + +res_as_list <- Reduce( + x = 1:1000, + init = p, + accumulate = TRUE, + f = function(previous_cycle_result, cycle_number) { + previous_cycle_result %*% tpm[[cycle_number]] + } +) + +res <- do.call(rbind,res_as_list) + +# This is the trace, showing the proportion of people in each state at each time: +res + +colnames(res) <- c("sick", "sicker", "dead") + + +res <- as.data.table(res) + +res$cycle <- 0:(dim(res)[1]-1) + +plot_res <- melt(res,id.vars = "cycle",variable.name = "state",value.name = "pop") + + + +p2 <- ggplot(plot_res,aes(x = cycle, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + + +# Fixed-time TPMs --------------------------------------------------------- + +tpm <- list( + matrix( + c(0.99, 0.005 , 0.005, + 0.001, 0.9 , 0.099, + 0 , 0 , 1), + nrow = 3, + byrow = TRUE + ), + matrix( + c(0.98, 0.015 , 0.005, + 0.001, 0.95 , 0.049, + 0 , 0 , 1), + nrow = 3, + byrow = TRUE + ) +) + +switch_time <- 25 + + +res_as_list <- Reduce( + x = 1:1000, + init = p, + accumulate = TRUE, + f = function(previous_cycle_result, cycle_number) { + + if(cycle_number < switch_time) { + previous_cycle_result %*% tpm[[1]] + } else { + previous_cycle_result %*% tpm[[2]] + } + } +) + +res <- do.call(rbind,res_as_list) + +# This is the trace, showing the proportion of people in each state at each time: +res + +colnames(res) <- c("sick", "sicker", "dead") + + +res <- as.data.table(res) + +res$cycle <- 0:(dim(res)[1]-1) + +plot_res <- melt(res,id.vars = "cycle",variable.name = "state",value.name = "pop") + + + +p3 <- ggplot(plot_res,aes(x = cycle, y = pop, colour = state)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + +p1 +p2 +p3 diff --git a/2_Scripts/standalone scripts/patient flow examples/patient_flow.R b/2_Scripts/standalone scripts/patient flow examples/patient_flow.R new file mode 100644 index 0000000..eee1f06 --- /dev/null +++ b/2_Scripts/standalone scripts/patient flow examples/patient_flow.R @@ -0,0 +1,201 @@ + +# This script is an example of how the patient flow sheet calculations will take place +# There will be one for each treatment pathway (as these are the states of the world +# to be compared to each other). The result will be one 2-dimensional numeric matrix +# per treatment pathway. Selected values from the colSums() of this matrix will be +# the results that feed into results calculations, and also all the necessary breakdowns +# of costs, LYs, QALYs, doses received and so on, so this matrix should be quite +# exhaustive and should think about the future (i.e. what tables will be needed, +# yes, but also what tables will be needed in the future too) + +# A THROWAWAY EXAMPLE of ONE p - one iteration of one scenario of the parameters +# list. obviously this will all come from Excel extraction +require(data.table) +p <- list( + basic = list( + TH_yr = 40, + cl_yr = 28 / 365.25, + cl_mo = 1 / 12, + cl_we = 28 / 7, + cl_da = 28, + disc_c = 0.035, + disc_q = 0.035, + TH = ceiling((365.25/28) * 40), + t_yr = (seq(0,ceiling((365.25/28) * 40)) * (28 / 365.25))[1:ceiling((365.25/28) * 40)], + drug_names = c("drug 1", "drug 2") + ), + path = list( + pathways = 1:2, + labels = c("Drug 1 only", "Drug 1 followed by Drug 2") + ), + drug = list( + rel_by_pathway = list("drug 1", c("drug 1", "drug 2")), + dosing_by_drug = list( + list( + name = "drug 1", + form = "IV", + basis = "mg/kg", + dose = 5, + dur = NULL, + titr = NULL, + wean = NULL + ), + list( + name = "drug 2", + form = "oral", + basis = NULL, + dose = 3, + dur = 26, + titr = list( + c(dose = 1, dur = 4), + c(dose = 2, dur = 8) + ), + wean = NULL + ) + ), + final_cost = list( + 50, + 100 + ) + ) +) + +names(p$basic$drug_names) <- p$basic$drug_names +names(p$drug$dosing_by_drug) <- p$basic$drug_names +names(p$drug$final_cost) <- p$basic$drug_names + +# Discount factors. Discounting is done after +p$basic$dfac_c <- 1/((1+p$basic$disc_c) ^ p$basic$t_yr) +p$basic$dfac_q <- 1/((1+p$basic$disc_q) ^ p$basic$t_yr) + + + +# make a list of results r to house...the results + +r <- list( + det = list(), + psa = list(), + scen = list(), + owsa = list(), + evppi = list() +) + +# FOR AN EXAMPLE - calculate the patient flow sheet of the deterministic base-case +# analysis using the input list p. This list should contain EVERYTHING +# needed to run the results. DO NOT refer to things outside of +# p whilst inside this Reduce() call. This is the most central +# aspect of the CE model and MUST be as transparent as possible. + +r$det$pf <- lapply(p$path$pathways, function(pathway) { + + + # Make the top row of the patient flow sheet for this treatment pathway. This + # varies only in some of the top row values. Drug costs and such will have + # different top row values per relevant drugs per line etc. In this simplified + # example, we've got drug 1 and drug 2. only drug 1 in pathway 1, drug 1 and + # drug 2 in pathway 2. + + top_row <- list( + cycle = 0, + t_yr = 0, + t_mo = 0, + t_we = 0, + t_da = 0, + dfac_c = 1, + dfac_q = 1 + ) + + # Drugs that are relevant in this treatment pathway + dr_nam <- p$basic$drug_names + + # retrieve a vector of the relevant drugs for this treatment pathway + relevant_drugs <- p$drug$rel_by_pathway[[pathway]] + + # To check which pathway we're currently in: + # which_pathway <- p$path$labels[pathway] + + # retrieve the dosing information for the relevant drugs for this treatment pathway + # Note one square bracket allows subsetting of a list for multiple entries + dosing_info <- p$drug$dosing_by_drug[relevant_drugs] + + # Add in 0s for ALL POSSIBLE DRUGS. This then ensures that all patient flow sheets + # for all treatment pathways have the same columns, allowing one function + # to summarise them all. + top_row[paste0("d_cost_undisc_",p$basic$drug_names)] <- 0 + top_row[paste0("d_cost_disc_" ,p$basic$drug_names)] <- 0 + + # now, only for the relevant drugs, add in their drug cost in the first cycle: + top_row[paste0("d_cost_undisc_",relevant_drugs)] <- unlist(p$drug$final_cost[relevant_drugs]) + + pf_cycle_list <- Reduce( + accumulate = TRUE, + x = 1:p$basic$TH, + init = as.list(top_row), + f = function(previous_cycle, cycle_number) { + + # calculate the values for the next cycle using + # - cycle_number: the current cycle number - note x goes from 1:TH not 0:TH as top_row is cycle 0 + # - previous_cycle: the result of reduce for the previous cycle + # - p: a complete parameters list for this scenario of the model. + # - This should contain ALL inputs required to calculate the patient flow + # - p is specific to this iteration for this structural scenario + # - p should be substituted for e.g. PSA[[1]] or OWSA$ub[[this_parameter]] to run new PF sheets + # - NOTHING AT ALL from outside should be used here, except for functions. + # - All elements in top_row are named so that it is clear what data is used + # - All elements in top_row AND previous_cycle must have length 1 + # - functions to calculate this_cycle given p, previous_cycle, and cycle_number + + # start from the values from the previous cycle + prev <- as.list(previous_cycle) + this <- prev + + # Calculate values for this cycle - start from the basics: + this$cycle <- prev$cycle + 1 + this$t_yr <- prev$t_yr + p$basic$cl_yr + this$t_mo <- prev$t_mo + p$basic$cl_mo + this$t_we <- prev$t_we + p$basic$cl_we + this$t_da <- prev$t_da + p$basic$cl_da + + # We can use dosing_info as an argument to a function whcih calculates + # consumption of each drug in our cycle length period given the point in + # time and the potential rules, RDIs and so on. + + # assign the relevant drug costs to the relevant columns + this[paste0("d_cost_undisc_",relevant_drugs)] <- p$drug$final_cost[relevant_drugs] + + # Note we do not perform discounting here as it's inefficient. instead + # take the discount factors e.g., p$basic$dfac_c and apply them at the end! + + # note, when using reduce you can refer to objects in previous_cycle, which + # gets around the biggest pitfall of lapply vs for, which is referring to + # previous iterations. + + # ... you get the idea, repeat this stuff for every column in the PFS sheet + + # once the cylce is calculated, unlist the results. this puts it back into + # a named vector, so that at the end, they can all be rbound together into + # one numeric matrix! + as.data.table(this) + } + ) + + # This sticks them together row wise and then separates them column wise! + res <- as.list(rbindlist(pf_cycle_list)[1:p$basic$TH]) + + res$dfac_c <- p$basic$dfac_c + res$dfac_q <- p$basic$dfac_q + + res[paste0("d_cost_disc_",p$basic$drug_names)] <- lapply(res[paste0("d_cost_undisc_",p$basic$drug_names)], function(x) x * res$dfac_c) + + # Before returning this treatment pathway, bind the results together row wise, or + # get a huge computational gain by defining it as a matrix. This results + # in a 2d matrix for each treatment pathway which is not restricted by size, labelling, + # number of treatment pathways, approaches to calculating columns, underlying settings + # in p, whether the analysis is probabilistic or deterministic, whether it's a + # scenario analysis or a base-case analysis. + + return(as.data.table(res)) + +}) + +r$det$pf diff --git a/2_Scripts/standalone scripts/results processing/combine-psa-results.R b/2_Scripts/standalone scripts/results processing/combine-psa-results.R new file mode 100644 index 0000000..3282198 --- /dev/null +++ b/2_Scripts/standalone scripts/results processing/combine-psa-results.R @@ -0,0 +1,26 @@ +library(data.table) + +PAS_price_rds_files <- dir(path = "./4_Output/PSA-PAS-price", pattern = "PSA_output.*\\.rds") +List_price_rds_files <- dir(path = "./4_Output/PSA-list-price", pattern = "PSA_output.*\\.rds") + +PSA_results_PAS_price <- PAS_price_rds_files |> + lapply(function(filename) file.path("./4_Output/PSA-PAS-price", filename)) |> + lapply(readRDS) |> + rbindlist() + +setorder(PSA_results_PAS_price, iteration, oo_pop, trt_n) +PSA_results_PAS_price[, dd_drug_price_options := "PAS price"] + +PSA_results_List_price <- List_price_rds_files |> + lapply(function(filename) file.path("./4_Output/PSA-list-price", filename)) |> + lapply(readRDS) |> + rbindlist() + +setorder(PSA_results_List_price, iteration, oo_pop, trt_n) +PSA_results_List_price[, dd_drug_price_options := "List price"] + +combined_results <- rbind(PSA_results_PAS_price, PSA_results_List_price) + +combined_results[, total_costs := mol_0 + mol_1 + mol_2 + mol_3 + mol_4 + mol_5 + mol_6 + mol_7 + mol_8 + mol_9 + mol_10 + mol_11 + mol_12 + mol_999 + other_costs] + +saveRDS(combined_results, "./4_Output/PSA-combined.rds") diff --git a/2_Scripts/standalone scripts/results processing/extracting results tables.R b/2_Scripts/standalone scripts/results processing/extracting results tables.R new file mode 100644 index 0000000..6d0c9c1 --- /dev/null +++ b/2_Scripts/standalone scripts/results processing/extracting results tables.R @@ -0,0 +1,689 @@ +#Ed's code for extracting results tables + + + +# res <- readRDS(rstudioapi::selectFile( +# path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files", +# existing = TRUE)) + +#to get total cost, QALYS and LY: + +dir.create("4_Output/tables_for_report_PAS_TO BE DELETED/basecase") +for (n in 1:3) { + summary <- f_res_sum_weighted_model( + rd = res$weighted_model_disc[[n]], + rud = res$weighted_model_undisc[[n]] + ) + summary$drug <- i$r_pld_lookup_mol$Description[match(summary$L1, i$r_pld_lookup_mol$Number)] + summary <- summary[,c(1,5,2,4,3)] + summary <- summary[order(summary$costs),] + summary[,c("ly","qalys")] <- round(summary[,c("ly","qalys")],3) + write.csv(summary, file = paste0("4_Output/tables_for_report_PAS_TO BE DELETED/basecase/summary",n,".csv"), row.names = F) + + #to get incremental analyses (note excludes dominated): + nondom <- res$weighted_incremental[[n]]$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(1,9,3,2,4,5,7,6,8)] + nondom[,c(4:5,7:8)] <- round(nondom[,c(4:5,7:8)],3) + nondom[,c(3,6,9)] <- round(nondom[,c(3,6,9)],2) + write.csv(nondom, file = paste0("4_Output/tables_for_report_PAS_TO BE DELETED/basecase/nondom",n,".csv"), row.names = F) + + LY <- res$weighted_model_undisc[[n]][,c(1,45:53)] + LY$drug <- i$r_pld_lookup_mol$Description[match(LY$L1, i$r_pld_lookup_mol$Number)] + LY <- LY[,c(1,11,2:3, 5:10, 4)] + LY <- t(LY) + write.csv(LY, file = paste0("4_Output/tables_for_report_PAS_TO BE DELETED/basecase/LY",n,".csv"), row.names = T) + + QALY <- res$weighted_model_disc[[n]][,c(1,28:44)] + QALY$drug <- i$r_pld_lookup_mol$Description[match(QALY$L1, i$r_pld_lookup_mol$Number)] + QALY <- as.data.frame(QALY) + for (m in seq(2,14,4)) { + QALY[,m] <- QALY[,m] + QALY[,(m+1)] + } + QALY <- t(QALY[,c(1,19,seq(2,17,2),18)]) + write.csv(QALY, file = paste0("4_Output/tables_for_report_PAS_TO BE DELETED/basecase/QALY",n,".csv"), row.names = T) + + cost <- res$weighted_model_disc[[n]][,c(1:27)] + cost$drug <- i$r_pld_lookup_mol$Description[match(cost$L1, i$r_pld_lookup_mol$Number)] + cost <- cost[,c(1,28,2:27)] + cost$substx_drug <- apply(cost[,c("drug_L2","drug_L3","drug_L4","drug_L5")],1,sum) + cost <- cost[,-c("drug_L2","drug_L3","drug_L4","drug_L5")] + cost$substx_admin <- apply(cost[,c("admin_L2","admin_L3","admin_L4","admin_L5")],1,sum) + cost <- cost[,-c("admin_L2","admin_L3","admin_L4","admin_L5")] + cost$substx_ae <- apply(cost[,c("ae_cost_L2","ae_cost_L3","ae_cost_L4","ae_cost_L5")],1,sum) + cost <- cost[,-c("ae_cost_L2","ae_cost_L3","ae_cost_L4","ae_cost_L5")] + cost$mru_L1 <- apply(cost[,c("mru_on_L1","mru_off_L1")],1,sum) + cost <- cost[,-c("mru_on_L1","mru_off_L1")] + cost$mru_substx <- apply(cost[,c("mru_on_L2","mru_off_L2", + "mru_on_L3","mru_off_L3", + "mru_on_L4","mru_off_L4", + "mru_on_L5","mru_off_L5")],1,sum) + cost <- cost[,-c("mru_on_L2","mru_off_L2", + "mru_on_L3","mru_off_L3", + "mru_on_L4","mru_off_L4", + "mru_on_L5","mru_off_L5")] + cost <- cost[,c(1:5,7:11,6)] + cost$total <- apply(cost[,3:11],1,sum) + cost <- cost[order(cost$total),] + write.csv(cost, file = paste0("4_Output/tables_for_report_PAS_TO BE DELETED/basecase/cost",n,".csv"), row.names = F) + +} + +dir.create("4_Output/tables_for_report_PAS_TO BE DELETED/scenarios") + +rds_files <- list.files("E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files", + full.names = TRUE) + +rds_files <- rds_files[grepl("\\.rds", rds_files)] +rds_files <- rds_files[grepl("Scenario", rds_files)] + + + + +# for (n in 1:length(rds_files)) { +# res <- readRDS(rds_files[n]) +# +# summary <- res$mk$wa_summarised[1:3] +# +# summary <- lapply(summary, function(x) { +# x$drug <- i$r_pld_lookup_mol$Description[match(x$L1, i$r_pld_lookup_mol$Number)] +# x <- x[,c(1,5,2:4)] +# x <- x[order(x$costs)] +# return(x)}) +# } +scenarios <- list(pop_1 = data.table(), + pop_2 = data.table(), + pop_3 = data.table()) + +for (n in 1:length(rds_files)) { + res <- readRDS(rds_files[n]) + + scenario <- substring(rds_files[n], gregexpr("Scenario", rds_files[n])[[1]][1], (gregexpr("\\.rds", rds_files[n])[[1]][1]-1)) + + non_dom <- lapply(res$mk$weighted_incremental[1:3], function(x) x$non_dominated) + + if (length(non_dom) == 0) { + warning("No data recorded for ", scenario) + next + } + + next_best <- + lapply(non_dom, function(x) + x$L1[which(x$L1 == "Cabozantinib plus nivolumab")-1] + ) + + incrementals <- lapply(non_dom, function(x) + x[which(x$L1 == "Cabozantinib plus nivolumab"),c("ic","iq","ICER")]) + + for (j in 1:3) { + scenarios[[j]] <- rbind(scenarios[[j]], cbind(scenario, next_best = next_best[j], incrementals[[j]])) + } +} + +write.csv(as.matrix(scenarios[[1]]), file = "4_Output/tables_for_report_PAS_TO BE DELETED/scenarios/pop1.csv", row.names = F) +write.csv(as.matrix(scenarios[[2]]), file = "4_Output/tables_for_report_PAS_TO BE DELETED/scenarios/pop2.csv", row.names = F) +write.csv(as.matrix(scenarios[[3]]), file = "4_Output/tables_for_report_PAS_TO BE DELETED/scenarios/pop3.csv", row.names = F) + + +#population 3 analyses fail in most cases - correcting manually. + +#Manual extractions +#Scenario 1 + #pop_1 + nondom <- Scenario_1_PAS$ps$incremental$pop_1$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + + ce <- Scenario_1_PAS$ps$tables$top_line$pop_1 + ce$drug <- i$r_pld_lookup_mol$Description[match(ce$L1, i$r_pld_lookup_mol$Number)] + ce <- ce[,c(4,5,1,2,3)] + ce <- ce[order(ce$costs),] + + round(ce[ce$drug == "Cabozantinib plus nivolumab",costs] - ce[ce$drug == "Pazopanib",costs],2) + round(ce[ce$drug == "Cabozantinib plus nivolumab",qalys] - ce[ce$drug == "Pazopanib",qalys],3) + #pop_2 + + nondom <- Scenario_1_PAS$ps$incremental$pop_2$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + + #pop_3 + + nondom <- Scenario_1_PAS$ps$incremental$pop_3$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + + nondom + +#Scenario 2 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + nondom <- pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + nondom <- pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + nondom <- pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 3 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 4 + #use base_Case for this (undisc) + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + #pop_1 + pop1 <- pas$weighted_model_undisc$pop_1 + pop1$costs <- apply(pop1[,2:27],1,sum) + pop1$qalys <- apply(pop1[,28:44],1,sum) + pop1 <- pop1[,c(1,54,55)] + pop1$drug <- i$r_pld_lookup_mol$Description[match(pop1$L1, i$r_pld_lookup_mol$Number)] + pop1 <- pop1[,c(1,4,2,3)] + pop1 <- pop1[order(pop1$cost),] + + pop1 + #comparator = sunitinib + round(pop1[pop1$drug == "Cabozantinib plus nivolumab",costs] - pop1[pop1$drug == "Sunitinib",costs]) + round(pop1[pop1$drug == "Cabozantinib plus nivolumab",qalys] - pop1[pop1$drug == "Sunitinib",qalys],3) + + round((pop1[pop1$drug == "Cabozantinib plus nivolumab",costs] - pop1[pop1$drug == "Sunitinib",costs]) / + (pop1[pop1$drug == "Cabozantinib plus nivolumab",qalys] - pop1[pop1$drug == "Sunitinib",qalys])) + + #pop_2 + pop <- pas$weighted_model_undisc$pop_2 + pop$costs <- apply(pop[,2:27],1,sum) + pop$qalys <- apply(pop[,28:44],1,sum) + pop <- pop[,c(1,54,55)] + pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + pop <- pop[,c(1,4,2,3)] + pop <- pop[order(pop$cost),] + + pop + #comparator = sunitinib + round(pop[pop$drug == "Cabozantinib plus nivolumab",costs] - pop[pop$drug == "Sunitinib",costs]) + round(pop[pop$drug == "Cabozantinib plus nivolumab",qalys] - pop[pop$drug == "Sunitinib",qalys],3) + + round((pop[pop$drug == "Cabozantinib plus nivolumab",costs] - pop[pop$drug == "Sunitinib",costs]) / + (pop[pop$drug == "Cabozantinib plus nivolumab",qalys] - pop[pop$drug == "Sunitinib",qalys])) + + #pop_3 + pop <- pas$weighted_model_undisc$pop_3 + pop$costs <- apply(pop[,2:27],1,sum) + pop$qalys <- apply(pop[,28:44],1,sum) + pop <- pop[,c(1,54,55)] + pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + pop <- pop[,c(1,4,2,3)] + pop <- pop[order(pop$cost),] + + pop + #cabo+nivo dominated + +#Scenario 5 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 6 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 7 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + nondom <- pas$ps$incremental$pop_1$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + nondom + #pop_2 + nondom <- pas$ps$incremental$pop_2$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + nondom + #pop_3 + nondom <- pas$ps$incremental$pop_3$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(2,9,3,4,1,5:8)] + nondom + +#Scenario 8 + # n/a'd in table? + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + + +#Scenario 9 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 10 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + nondom <- pas$mk$weighted_incremental$pop_1$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(1,9,2:8)] + nondom + #pop_2 + nondom <- pas$mk$weighted_incremental$pop_2$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(1,9,2:8)] + nondom + #pop_3 + nondom <- pas$mk$weighted_incremental$pop_3$non_dominated + nondom$drug <- i$r_pld_lookup_mol$Description[match(nondom$L1, i$r_pld_lookup_mol$Number)] + nondom <- nondom[,c(1,9,2:8)] + nondom + +#Scenario 11 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 12 + #TO FOLLOW? + +#Scenario 13 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenario 14 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 15 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 16 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 17 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 18 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 19 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 20 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + + +#Scenario 21 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 22 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 23 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 24 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 25 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 26 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pop <- pas$tables$top_line$pop_1 + pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + pop <- pop[,c(4:5,1:3)] + pop + #pop_2 + pas$incremental$pop_2$non_dominated + #pop_3 + pas$incremental$pop_3$non_dominated + +#Scenario 27 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +# Scenarios 28-37 + #TO FOLLOW + +#Scenario 38 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 39 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenario 40 + #TO FOLLOW + +#Scenario 41 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + +#Scenario 42 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenarios 43-51 + #TO FOLLOW + +#Scenario 52 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files")) + + #pop_1 + pas$mk$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$mk$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$mk$weighted_incremental$pop_3$non_dominated + + + +# LIST PRICES +#Base Case + #RERUNNING + # pas <- readRDS(rstudioapi::selectFile( + # path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + # #pop_1 + # pop <- pas$weighted_model_disc$pop_1 + # colnames(pop)[2:27] + # pop$costs <- apply(pop[,2:27],1,sum) + # pop$qalys <- apply(pop[,28:44],1,sum) + # pop <- pop[,c(1,45,46)] + # pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + # pop <- pop[,c(1,4,2,3)] + # pop <- pop[order(pop$cost),] + # pop + # + # pas$weighted_incremental$pop_3$non_dominated + # + # + # #pop_2 + # pop <- pas$weighted_model_undisc$pop_2 + # pop$costs <- apply(pop[,2:27],1,sum) + # pop$qalys <- apply(pop[,28:44],1,sum) + # pop <- pop[,c(1,54,55)] + # pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + # pop <- pop[,c(1,4,2,3)] + # pop <- pop[order(pop$cost),] + # + # pop + # #comparator = sunitinib + # round(pop[pop$drug == "Cabozantinib plus nivolumab",costs] - pop[pop$drug == "Sunitinib",costs]) + # round(pop[pop$drug == "Cabozantinib plus nivolumab",qalys] - pop[pop$drug == "Sunitinib",qalys],3) + # + # round((pop[pop$drug == "Cabozantinib plus nivolumab",costs] - pop[pop$drug == "Sunitinib",costs]) / + # (pop[pop$drug == "Cabozantinib plus nivolumab",qalys] - pop[pop$drug == "Sunitinib",qalys])) + # + # #pop_3 + # pop <- pas$weighted_model_undisc$pop_3 + # pop$costs <- apply(pop[,2:27],1,sum) + # pop$qalys <- apply(pop[,28:44],1,sum) + # pop <- pop[,c(1,54,55)] + # pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + # pop <- pop[,c(1,4,2,3)] + # pop <- pop[order(pop$cost),] + # + # pop + # #cabo+nivo dominated + + +#Scenario 1 + # NOTE: use file ps_model_Scenario_1.rds for this scenario + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + + #pop_1 + pop <- pas$tables$top_line$pop_1 + pop$drug <- i$r_pld_lookup_mol$Description[match(pop$L1, i$r_pld_lookup_mol$Number)] + pop <- pop[,c(4:5,1:3)] + pop <- pop[order(pop$cost),] + pop + + pas$incremental$pop_1 + + #pop_2 + pas$incremental$pop_2$non_dominated + + #pop_3 + pas$incremental$pop_3$non_dominated + +#Scenario 2 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenario 3 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenario 4 (Use Base Case file) + #RERUNNING BASE CASE + # pas <- readRDS(rstudioapi::selectFile( + # path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + # + # #pop_1 + # pas$weighted_incremental$pop_1$non_dominated + # #pop_2 + # pas$weighted_incremental$pop_2$non_dominated + # #pop_3 + # pas$weighted_incremental$pop_3$non_dominated + +#Scenario 5 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + +#Scenario 6 + pas <- readRDS(rstudioapi::selectFile( + path = "E:/University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files/list prices")) + + #pop_1 + pas$weighted_incremental$pop_1$non_dominated + #pop_2 + pas$weighted_incremental$pop_2$non_dominated + #pop_3 + pas$weighted_incremental$pop_3$non_dominated + \ No newline at end of file diff --git a/2_Scripts/standalone scripts/results processing/output_script_old.R b/2_Scripts/standalone scripts/results processing/output_script_old.R new file mode 100644 index 0000000..f6a5bfd --- /dev/null +++ b/2_Scripts/standalone scripts/results processing/output_script_old.R @@ -0,0 +1,1707 @@ +#### 1. Installation ########### +#### This code has been created using R version 4.3.1 +#### All packages used by this model are provided here + +#### Comment out the below section which installs the relevant packages after the first run of the model +# install.packages("shiny", quiet = TRUE) ### the quiet argument is used to avoid warnings appearing in the console (useful for later conversion to web app) +# install.packages("gtools", quiet = TRUE) +# install.packages("openxlsx", quiet = TRUE) +# install.packages("flexsurv", quiet = TRUE) +# install.packages("tidyverse", quiet = TRUE) +# install.packages("data.table", quiet = TRUE) +# install.packages("heemod", quiet = TRUE) +# install.packages("logOfGamma", quiet = TRUE) +# install.packages("ggplot2", quiet = TRUE) +# install.packages("survminer", quiet = TRUE) +# install.packages("officer", quiet = TRUE) +# install.packages("officedown", quiet = TRUE) +# install.packages("magrittr", quiet = TRUE) +# install.packages("Hmisc", quiet = TRUE) +# install.packages("future.apply", quiet = TRUE) +# install.packages("crosstable", quiet = TRUE) +# install.packages("flextable", quiet = TRUE) +# install.packages("stringr", quiet = TRUE) +# install.packages("BCEA", quiet = TRUE) +# install.packages("collapse", quiet = TRUE) +# install.packages("scales", quiet = TRUE) +# install.packages("Matrix", quiet = TRUE) +# install.packages("dplyr", quiet = TRUE) + +### Loading libraries + +#### This section needs to be run every time and calls each package from the library +library(shiny, quiet = TRUE) +library(gtools, quiet = TRUE) +library(openxlsx, quiet = TRUE) +library(flexsurv, quiet = TRUE) +library(tidyverse, quiet = TRUE) +library(data.table, quiet = TRUE) +library(heemod, quiet = TRUE) +library(logOfGamma, quiet = TRUE) +library(ggplot2, quiet = TRUE) +library(survminer, quiet = TRUE) +library(officer, quiet = TRUE) +library(officedown, quiet = TRUE) +library(magrittr, quiet = TRUE) +library(Hmisc, quiet = TRUE) +library(future.apply, quiet = TRUE) +library(crosstable, quiet = TRUE) +library(flextable, quiet = TRUE) +library(stringr, quiet = TRUE) +library(BCEA, quiet = TRUE) +library(collapse, quiet = TRUE) +library(scales, quiet = TRUE) +library(Matrix, quiet = TRUE) +library(dplyr, quiet = TRUE) + +#### 2. Loading functions ########### + + +# This variable is used throughout the model to define whether to provide additional outputs useful for QC or not +# The model will take longer to run when this is set to TRUE +qc_mode <- FALSE + + +# This function allows parallel processing (similar to future apply) +plan(multisession) + +options(crosstable_units="cm") + +# 2.1. Excel data extraction functions ----------------------------------------- + +#### These functions are used to extract parameters from the Excel input workbook for use in R +#### During Phase 2 a Shiny front-end will be added to the model which will allow an alternative mechanism to upload these types of inputs + +source(file.path("./3_Functions/excel/extract.R")) + +# 2.2. Treatment sequencing functions ---------------------------------------- + +#### Function: filter to active treatments and lines +##### Takes as an input the defined sequences, evaluation type and line to start the evaluation from +##### Other input is % receiving each subs therapy at each line dependent on previous treatments received +##### Reweights so that the % receiving each treatment sums to 100% within each arm / line being studied +##### Outputs a matrix that has the % receiving each possible combination + +source("./3_Functions/sequencing/sequences.R") + +# 2.3. Survival analysis functions --------------------------------------------- + +# Function: conduct survival analysis +##### by treatment, line, population and outcome fitted survival curves using Flexsurvreg (exp, Weibull, lognormal, loglog, Gompertz, gen gamma) +##### calculation of and adjustment for general population +##### adjustment for treatment effect waning + +source("./3_Functions/survival/Survival_functions.R") +source("./3_Functions/survival/other_cause_mortality.R") +source("./3_Functions/survival/treatment_effect_waning.R") + +# 2.4 Misc functions ---------------------------------------------------------- + +### these functions enable smoother data cleaning and manipulation + +source("./3_Functions/misc/other.R") +source("./3_Functions/misc/shift_and_pad.R") +source("./3_Functions/misc/cleaning.R") + +# 2.4.1 Functions imposing list structures ----------------------------------- + +source("./3_Functions/misc/nesting.R") +source("./3_Functions/misc/discounting.R") +source("./3_Functions/misc/qdirichlet.R") +source("./3_Functions/misc/plotting.R") +source("./3_Functions/misc/structure.R") + + +# 2.5 Utility functions ------------------------------------------------------- + +source("./3_Functions/utility/age_related.R") +source("./3_Functions/costs_and_QALYs/utility_processing.R") + +# 2.6 AE functions -------------------------------------------------------- + +source("./3_Functions/adverse_events/AE_steps.R") + +# 2.7 Cost calculation functions -------------------------------------------- + +source("./3_Functions/costs_and_QALYs/cost_processing.R") + + +# 2.8 State transition modelling functions -------------------------------- + +source("./3_Functions/markov/markov.R") + +# 2.9 Patient flow functions ---------------------------------------------- + +source("./3_Functions/patient_flow/overarching.R") +source("./3_Functions/patient_flow/partitioned_survival.R") +source("./3_Functions/patient_flow/markov.R") +source("./3_Functions/patient_flow/drug_costs.R") +source("./3_Functions/patient_flow/hcru_costs.R") +source("./3_Functions/patient_flow/qalys.R") +source("./3_Functions/patient_flow/ae.R") + + + +# 2.10 Results processing functions --------------------------------------- + +source("./3_Functions/results/incremental_analysis.R") +source("./3_Functions/results/model_averaging.R") +source("./3_Functions/results/partitioned_survival.R") +source("./3_Functions/misc/severity_modifier.R") +source("./3_Functions/results/results_tables.R") + +# 3. Model inputs structure -------------------------------------------------- + +# Model inputs should be in a list called i. This list then contains all of the +# inputs for the model, NOT the parameters used to calculate the model. In effect, +# this is a place to store all model information BEFORE it gets boiled down to +# what's needed to run 1 model. +# +# using i allows subsetting by categorisation, which makes things a lot easier +# to find and avoids all long variable names +# +# the structure of i should be by category. There are the following +# categories: +# +# dd - dropdown inputs taken from Excel +# i - parameter inputs taken from Excel +# r_ tables taken from Excel +# List, id and lookup - lists defined and used within the code +# basic - basic inputs (time horizon, cycle length, discount rates, so on so forth) +# surv - survival analysis inputs including raw data +# sequences and seq - inputs and outputs related to the possible sequences of treatments +# cost - drug and hcru costs. All costs are here to keep things together (dosing is not cost) +# util and QALYs - utility and QALY inputs +# misc - misc inputs e.g. graph labelling +# + +#### 3.1 Loading input parameters ########### + +# This model allows two possible structures to be analysed: state transition with a user definable number of lines +# with health states based on time to discontinuation (drug costs) and progression status (quality of life and movement +# between lines) and PartSA with 3 health states (pre-progression, post-progression and death) + +# During Phase 1 of this pilot we use the model to evaluate the decision problem for a single therapy +# (cabo+nivo, defined as molecule 1) starting at 1st line +# During Phase 2 we will adapt this code to evaluate the cost-effectiveness of sequences starting at a user-defined line + +# Inputs to this model need to be downloaded from NICEdocs + +User_types <- c("Submitting company", "NICE", "EAG", "Committee", "NHSE", "Clinical expert", "Patient expert", "Non-intervention stakeholder", "Public") + +# The submitting company are able to see their own CIC and AIC data (marked up blue / yellow in reporting but not anything else: green marking +# green marked data has been either be replaced with 0 [PAS discounts, RWE IPD] or dummy data) +# NICE users will be able to see everything +# Other users will not be able to see any marked data, this is replaced with dummy data + +# The way raw data is fed into the model currently works as follows +# Define the path to where the data file lives using the select file functionality + +# The model then processes the file the user selected + +# There are a number of files which contain raw or intermediate inputs: +# 1. The Excel user interface - this contains information from company data and the UK RWE +# 2. The proportional hazards NMA CODA RDS file - this contains information from company data +# 3. The fractional polynomials NMA RDS file - this contains information from company data +# 4. Either the raw data file containing the pseudo-IPD for all trials for survival analysis (RWE and company data included); or +# 5. The RDS output from the survival analysis using both RWE and company data + + + + +################# START REPORT GENERATION ####################### + +#note - change stem to wherever your sharepoint files are kept. Subsequent lines should work ok +stem <- "E:/" + +rds_file_path <- paste0(stem,"University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Results rds files") +output_file_path <- "./4_Output" +scenario_files_path <- paste0(stem,"University of Exeter/PATT Pathways - Renal cell carcinoma - Documents/04-2 ERG Model/Inputs front end file/Scenario set up files") + +#get list of rds files +files <- list.files(rds_file_path) +files <- files[grepl("Scenario", files)] + +for (f in files) { + # for each rds - does the relevant word file exist? (i.e. already run). If yes skip. + # update list of output files + output_files <- list.files(output_file_path) + output_files <- output_files[grepl("Scenario", output_files)] + + # extract scenario number and PAS/list prices + scenario <- substr(f, 9, regexpr("price", f)[1]-2) + # check if word file already exists, if so skip + if (length(output_files)>0) { + if (sum(grepl(scenario, output_files)) > 0) { + cat(f,": word output report already exists. Skipping.\n") + next + } + } + #match to correct input file (scenario number and PAS/list) + scenario_nr <- as.numeric(substr(scenario,1,regexpr("_", scenario)[1]-1)) + price_type <- substr(scenario,regexpr("_", scenario)[1] + 1, nchar(scenario)) + + if (price_type == "List") { + price_directory <- "/List price" + } else if (price_type == "PAS") { + price_directory <- "/PAS price" + } else { + stop("Failed to itentify price type for scenario") + } + + excel_path <- paste0(scenario_files_path, price_directory,"/Scenario ", scenario_nr, ".xlsm") + + if (!file.exists(excel_path)) { + stop("Unable to find scenario inputs file ", excel_path) + } + + # The first part of this code pulls all of the named ranges from the excel workbook, expand the parameters table + + i <- f_excel_extract(excel_path, verbose = TRUE) + i <- c(i,f_excel_cleanParams(i$R_table_param)) + + + + # Set which decision problem to look at, initially functionality has been geared towards the decision problem for cabozantinib plus nivolumab + i$decision_problem <- "cabo+nivo" + + # We then create a place for identifiers. Adding in an object to i full of lookup tables makes automated translation + # possible even when one doesn't know the number of items ex ante, or how they combine. + # + # If the lookup table is correct one can translate id numbers to text strings which are + # consistent throughout the entire model. This is extremely useful as the model can + # be expanded to any number of treatments and potentially even any number of lines + # (up to a reasonable maximum) + + i$id <- list(ipd = list()) + i$lookup <- list(ipd = list()) + + # Add distribution names to i + # This model only includes standard parametric distributions as more complex distributions were not deemed to be required for the included treatments + + i$distnames <- + c( + gengamma = "gengamma", + exp = "exp", + weibull = "weibull", + lnorm = "lnorm", + gamma = "gamma", + gompertz = "gompertz", + llogis = "llogis" + ) + + + + # The next step is to then "tidy up" i into another object, p. p doesn't necessarily + # have to house everything, only things that will change in PSA + + # PSA is not yet included in this model, this will be added during technical engagement + + # For the future: this is what a PSA would look like. Note that each p in p_psa + # needs to pull through one probabilistic iteration of all parameters. + # + # future_lapply(1:n_psa, function(psa_iteration) { + # generate_pf(p_psa[[psa_iteration]], structure=i$...) + # }) + # + + + p <- f_misc_param_generate_p(i) + + # Set seed for PSA + set.seed(1475) + + # Pass this into p so that p can be used to exclusively compute the model: + p$basic$decision_problem <- i$decision_problem + + + + + i$surv <- list() + + #### Read in survival data from Excel workbook + + # Pull out the raw data from the IPD excel book - one named range per treatment at each line + # Each reference curve is defined in Excel as time (weeks), event/censor (event coded as 1, censor as 0), patient group, line, molecule, trial and endpoint + # Pull all of the named ranges from the excel workbook, expand the parameters table + + excel_path2 <- "./1_Data/IPD_Confidential _Rinput.xlsx" + + wb <- f_excel_extract(excel_path2, verbose = TRUE) + + i$surv$pld <- as.data.table(wb$`_xlnm._FilterDatabase`) + + rm(wb) + + # Some small cleaning of the PLD. + i$surv$pld <- i$surv$pld[,list(population,line,molecule,trial,endpoint,timew,event_censor)] + + # Do not allow zero survival times, they have to be at least 1 day. the TUotA is + # weeks, so 1 day is 1/7 weeks: + i$surv$pld[timew ==0,"timew"] <- 1/7 + + # The named range r_pld has numeric identifiers for: + # + # - pop + # - line + # - mol (i.e., regimen - combination therapies are under the same number) + # - trial (trial id WITHIN population line and molecule to set them apart from each other - usually just 1!) + # - endpoint + + # These numeric identifiers are then used to create a nested list of survival regression models and + # extrapolations. The extrapolations are filtered down to the extrapolations that are selected + # within the excel input sheet, but the rest are kept here in i in case of scenario analysis. + # + # Note that the lookup tables in the next section are used to translate these numbers + # into human-readable identifiers. + + # 3.3.2 Data identification ------------------------------------------ + + # There is a lot of nesting involved in this part of the analysis, with population line, regimen trial and endpoint + # making a total of 5 layers of nesting to automatically go through each endpoint for each trial for + # each regimen for each line for each population, perform all regression analyses, produce parameters + # and have an easily identifiable (and therefore programmable) spaces for the results of each analysis + # which can then be spat out into reporting. + + # The first step is to break up r_pld into separate datasets depending on the identifiers. A function + # is used to do this which returns nothing if such data for one id set doesn't exist. + # + # Note that at this stage it is just those contexts which HAVE got PLD which are to be organised. + # For those endpoints and so on that do not have data, a separate step after this one to populate + # every endpoint for every treatment line for every treatment sequence is performed. + + i$id$ipd <- list( + pop = i$r_pld_lookup_pop$Number[!is.na(i$r_pld_lookup_pop$Number)], + line = i$r_pld_lookup_line$Number[!is.na(i$r_pld_lookup_line$Number)], + mol = i$r_pld_lookup_mol$Number[!is.na(i$r_pld_lookup_mol$Number)], + trial = i$r_pld_lookup_trial$Number[!is.na(i$r_pld_lookup_trial$Number)], + endpoint = i$r_pld_lookup_endpoint$Number[!is.na(i$r_pld_lookup_endpoint$Number)] + ) + + names(i$id$ipd$pop) <- paste0("pop_" , i$id$ipd$pop) + names(i$id$ipd$line) <- paste0("line_" , i$id$ipd$line) + names(i$id$ipd$mol) <- paste0("mol_" , i$id$ipd$mol) + names(i$id$ipd$trial) <- paste0("trial_" , i$id$ipd$trial) + names(i$id$ipd$endpoint) <- paste0("endpoint_", i$id$ipd$endpoint) + + + # to see this, we have: + #i$id$ipd + + # Generating the same structure but with the translation table from number to + # text: + + i$lookup$ipd <- list( + pop = data.table(i$r_pld_lookup_pop)[Description != 0], + line = data.table(i$r_pld_lookup_line)[Description != 0], + mol = data.table(i$r_pld_lookup_mol)[Description != 0], + trial = data.table(i$r_pld_lookup_trial)[Description != 0], + endpoint = data.table(i$r_pld_lookup_endpoint)[Description != 0] + ) + + # For treatment line, add a translator for the column in the sequences output: + + i$lookup$ipd$line$seq_col <- paste0("V",2:(nrow(i$lookup$ipd$line)+1)) + i$lookup$ipd$line$R_id <- paste0("line_",1:nrow(i$lookup$ipd$line)) + + i$lookup$dist <- i$r_pld_lookup_dist + + + # This means that you can easily look up things like so: + + # i$lookup$ipd$mol[Number == 1,list(Description,RCC_input_desc)] + # i$lookup$ipd$mol[Number == 2,list(Description,RCC_input_desc)] + # i$lookup$ipd$line[Number == 1,list(Description,RCC_input_desc)] + # i$lookup$ipd$pop[Number == 0,list(Description,RCC_input_desc)] + + # One can also do the opposite, translating input file descriptions into numbers: + + # i$lookup$ipd$mol[RCC_input_desc == "ipi_nivo",list(Description,Number)] + + i$lookup$trt <- i$lookup$ipd$mol$Number + names(i$lookup$trt) <- i$lookup$ipd$mol$RCC_input_desc + names(i$lookup$trt)[length(i$lookup$trt)] <- "BSC" + + # pass to p whenever i$lookup has been populated/updated. + p$basic$lookup <- i$lookup + + + p$basic$lookup$pop_map <- data.table(i$r_overall_lookup_pop) + + + res <- readRDS(paste0(rds_file_path,"/",f)) + + Scenario_number <- i$R_Scenario_num + + Scenario_name <- i$R_Scenario_name # Use ST for state transition, PS for Partitioned survival, LP for list price, cPAS for cPAS + + Run_date <- date() + + # 3.4.2 QALYs ------------------------------------------------------------- + + # Utilities are applied to the disease model by treatment by line and whether the patient is on or off treatment + # Age adjustment is conducted multiplicatively in line with DSU guidance using earlier defined patient characteristics for age and sex + + # Extracting from excel file + + p <- add_population_utility_params(p, psa = FALSE, .i = i) + + # Pre-calculate the population utility norms since they will be the same across + # all sequences (though may vary across populations), and store in p + + i$R_table_ptchar <- as.data.table(i$R_table_ptchar) + + base_utility <- data.frame(cycle = 0:p$basic$th, utility = 1) + if (i$dd_ageadjuutilities == "Yes") { + if (i$dd_age_sex_source == "Mean") { + # We find the row corresponding to line 1 for each relevant population + + # Do a lot of wrangling to get in the format we want... + ptc_L1 <- i$R_table_ptchar[Treatment.line == 1, c(1, 3, 4)] + colnames(ptc_L1) <- c("Population", "age", "sex") + ptc_L1$sex <- 1 - ptc_L1$sex + ptc_L1 <- merge(ptc_L1, i$lookup$ipd$pop, by.x = "Population", by.y = "Description") + ptc_L1 <- ptc_L1[order(ptc_L1$Number), c("age", "sex", "Number")] + ptc_L1 <- split(ptc_L1[, c("age", "sex")], paste0("pop_", ptc_L1$Number)) + + p$util$gpop <- lapply(ptc_L1, function(pop) adjust_utility( + age = pop$age, + sex = pop$sex, + utilities = base_utility, + .patient_level = FALSE, + .p = p + )) + + } else { + # We will only include IPD from line 1, since the population + # norm is applied according to absolute model time rather than + # than time in state. We don't know which population they are + # in, so we will replicate for pop_0, pop_1 and pop_2. + ipd_L1 <- i$R_table_patientagesex$Line == 1 + p$util$gpop <- list() + p$util$gpop$pop_0 <- adjust_utility( + age = i$R_table_patientagesex$Age[ipd_L1], + sex = if_else(i$R_table_patientagesex$Gender[ipd_L1] == "M", "male", "female"), + utilities = base_utility, + .patient_level = TRUE, + .p = p + ) + p$util$gpop$pop_1 <- p$util$gpop$pop_0 + p$util$gpop$pop_2 <- p$util$gpop$pop_0 + } + } else { + p$util$gpop <- list(pop_0 = 1, pop_1 = 1, pop_2 = 1) + } + + i$QALYs <- list() + + i$QALYs$utilities$means <- f_process_utilities(raw_utilities = i$R_table_util, + PSA = FALSE, + samples = FALSE) + # Sample code for PSA + + if (FALSE) { + i$QALYs$utilities$PSA <- f_process_utilities(i$R_table_util, + PSA = TRUE, + samples = 100) + } + + # For the deterministic analysis, pass the means into p for use in the model. + # We now have our population norms for calculating gpop utility multiplier + # compared to baseline as well as our HSUVs by PLM and treatment status. everything + # we need. + # + # For probabilistic settings we'd need to pass along the nth iteration of i$QALYs$utilities$PSA + # into the nth element in p_psa (i.e. copy of p). Due to size, this may need to be done + # one iteration at a time, in which case p_psa <- p and then replacing elements, then + # running the model saves a lot of memory. + p$util$mk <- data.table(i$QALYs$utilities$means) + + + # 3.6.2 Calculate severity modifier ----------------------------------- + + # The severity modifier for the weighted average first-line treatment comparison + # uses the best available treatment which is not nivo cabo (molecule 1) for the + # first 3 populations. + # + # This is because this is the best (i.e. most discounted QALYs) available 1L trt + # pathway set. + # + # Calculation is only provided for the state transition model + + if(p$basic$decision_problem == "cabo+nivo") { + p$basic$pops_to_run <- 1:3 + } else { + p$basic$pops_to_run <- NULL + } + + if (p$basic$structure == "State transition") { + + population_numbers <- if(sum(p$basic$pops_to_run == 1:3)>0){1:3} else{1:6} + + res$mk$qaly_shortfall_1_to_3 <- lapply(population_numbers, function(npa_pop) { + + lu_pop <- p$basic$lookup$pop_map + lu_rpop <- p$basic$lookup$ipd$pop + + # npa_pop is overall population, we need to look up risk population from it: + + risk_pop_n <- lu_pop[match(npa_pop,lu_pop$Overall.population.number),]$Risk.population.number + risk_pop <- lu_rpop[match(risk_pop_n,lu_rpop$Number),]$Description + + i$R_table_ptchar <- as.data.table(i$R_table_ptchar) + + if (i$dd_age_sex_source == "Mean") { + + # So for this risk population, we need the baseline characteristics: + bl_chars <- i$R_table_ptchar[Population == risk_pop & Treatment.line == 1,] + bl_age <- bl_chars$Starting.age..years..Mean + bl_male <- 1-bl_chars$Starting...female.Mean + + } else { + + patient_sex_age_IPD <- as.data.table(i$R_table_patientagesex) + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="M","male") + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="F","female") + + bl_age <- patient_sex_age_IPD[Line ==1]$Age + bl_male <- patient_sex_age_IPD[Line ==1]$Gender + + } + + pna_txt <- names(res$wa_summarised)[npa_pop] + + tab <- res$wa_summarised[[pna_txt]][L1 != 1,] + + met <- tab[which.max(qalys),] + + q_met <- met$qalys + comp_no_met <- met$L1 + + out <- calc_severity_modifier( + age = bl_age, + sex = bl_male, + .patient_level = if(i$dd_age_sex_source == "Mean") {FALSE} else {TRUE}, + qalys = q_met, + .i = i, + .p = p + ) + + out <- cbind(out, SOC = comp_no_met) + + return(out) + + }) + } + + + # 3.6.5 Outputting results to Word ------------------------------------------------------ + + landscape <- prop_section( + page_size = page_size(orient = "landscape") + ) + + portrait <- prop_section( + page_size = page_size(orient = "landscape") + ) + + + # Make a word document and add a table of contents to it with 4 levels. + # This uses a template word document so that styles and so on are the same. + + i$dd_producewordreport <- "Yes" + + if (i$dd_producewordreport == "Yes") {word_results <- TRUE} else {word_results <- FALSE} + + + if (word_results == TRUE) { + + doc_res <- read_docx("./3_Functions/reporting/empty results doc.docx") + + # Add a 1st level header for the overall population: + doc_res <- doc_res %>% + body_add_par(paste0("Results of Model Run in R Scenario name: " , Scenario_name),style = "heading 1") %>% + body_add_par(paste0("Date and time run: ", Run_date)) + + + Word_width_inches <- 29.7*0.3937 # width of the side borders in the word_document output (in centimeters) + + # Producing report tables (state transition model) ------------------------------------------------------ + + + # Make a word document containing results tables using the object res + + # Produces a different format depending on model structure + + if(p$basic$structure=="State transition") { + + # make a table by overall population for the summary results + ft_basic_bop <- do.call(rbind,lapply(structure(names(res$wa_summarised),.Names=names(res$wa_summarised)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- res$wa_summarised[[popu_txt]] + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + popu$L1 <- lu_mol[match(popu$L1,lu_mol$Number),]$Description + + return(popu) + + })) + + # Now do the same thing for the incremental analysis + ft_wa_inc<- do.call(rbind,lapply(structure(names(res$weighted_incremental),.Names=names(res$weighted_incremental)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + if (is.null(res$weighted_incremental[[popu_txt]]$non_dominated)) { + popu <- as.data.table(res$weighted_incremental[[popu_txt]]) + popu <- data.table(popu, ic = 0, iq = 0, il = 0, ICER = "Dominant" ) + popu$str_dom <- NULL + + } else { + popu <- as.data.table(res$weighted_incremental[[popu_txt]]$expanded_results) + popu$ICER[popu$extdom==FALSE] <- as.character(paste0("£",round(popu$ICER[popu$extdom==FALSE] ,0))) + popu$ICER[popu$extdom==TRUE] <- "(ext dominated)" + popu$str_dom <- NULL + popu$extdom <- NULL + popu$r <- NULL + + } + + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + + # Create table combining pairwise and incremental ICERs + + setDT(ft_basic_bop)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_basic_bop <- ft_basic_bop[order( risk_pop, costs)] # order by increasing costs + + setDT(ft_wa_inc)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_wa_inc <- ft_wa_inc[,c(1,3,2,4,5,6,7,8,9)] + + ft_pairwise <- do.call(rbind,lapply(structure(names(res$pairwise_vs_mol),.Names=names(res$pairwise_vs_mol)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- res$pairwise_vs_mol[[popu_txt]] + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + + return(popu) + + })) + + + + setDT(ft_pairwise)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + ft_pairwise$Pairwise_ICER <- ft_pairwise$icer + + ft_pairwise$Pairwise_ICER[is.na(ft_pairwise$icer) != TRUE] <- as.character(paste0("£",round(ft_pairwise$icer[is.na(ft_pairwise$icer) != TRUE] ,0))) + + + ft_pairwise[ft_pairwise$icer < 0 & ft_pairwise$iq < 0]$Pairwise_ICER <- "Cabo+nivo dominated" + ft_pairwise[ft_pairwise$icer < 0 & ft_pairwise$iq > 0]$Pairwise_ICER <- "Cabo+nivo dominant" + ft_pairwise[ft_pairwise$icer > 0 & ft_pairwise$iq < 0]$Pairwise_ICER <- paste0("SW quadrant ",ft_pairwise[ft_pairwise$icer > 0 & ft_pairwise$iq < 0]$Pairwise_ICER) + + ft_pairwise <- ft_pairwise[,.SD,.SDcols = c("L1", "costs", "qalys", "ly", "Pairwise_ICER","risk_pop")] + + ft_wa_inc <- merge(ft_pairwise, ft_wa_inc, all.x = TRUE) + + ft_wa_inc <- ft_wa_inc[,c(1,2,4,3,7,9,8,6,10,5)] + + ft_wa_inc <- ft_wa_inc[order( risk_pop, costs)] # order by increasing costs + + ft_wa_inc[is.na(ICER)]$ICER <- "(dominated)" + + + # Pull out nearest comparators + + comparator_no_allrisk <- ff_closest_comparator(res,"pop_1") + comparator_no_favrisk <- ff_closest_comparator(res,"pop_2") + comparator_no_IPrisk <- ff_closest_comparator(res,"pop_3") + + + # Create table for LY breakdown + + # All risk + + cabo_nivo_LY_allrisk <- res$weighted_model_undisc$pop_1[L1 == 1] %>% + select(starts_with("ly")) + + comparator_LY_allrisk <- res$weighted_model_undisc$pop_1[L1 == comparator_no_allrisk] %>% + select(starts_with("ly")) + + + ft_LY_all_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_allrisk, + comparator_outcomes = comparator_LY_allrisk, + comparator_no = comparator_no_allrisk, + LYorQALY = "LY" + ) + + # Fav risk + + cabo_nivo_LY_favrisk <- res$weighted_model_undisc$pop_2[L1 == 1] %>% + select(starts_with("ly")) + + comparator_LY_favrisk <- res$weighted_model_undisc$pop_2[L1 == comparator_no_favrisk ] %>% + select(starts_with("ly")) + + + ft_LY_fav_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_favrisk, + comparator_outcomes = comparator_LY_favrisk, + comparator_no = comparator_no_favrisk, + LYorQALY = "LY") + + + # Int/poor risk + cabo_nivo_LY_IPrisk <- res$weighted_model_undisc$pop_3[L1 == 1] %>% + select(starts_with("ly")) + + comparator_LY_IPrisk <- res$weighted_model_undisc$pop_3[L1 == comparator_no_IPrisk] %>% + select(starts_with("ly")) + + ft_LY_IP_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_IPrisk, + comparator_outcomes = comparator_LY_IPrisk, + comparator_no = comparator_no_IPrisk, + LYorQALY = "LY") + + + + + # Create tables for QALY breakdown + + # All risk + + cabo_nivo_QALY_allrisk <- res$weighted_model_disc$pop_1[L1 == 1] %>% + select(starts_with("qaly")) + + cabo_nivo_AEQALY_allrisk <- cbind(res$weighted_model_disc$pop_1[L1 == 1] %>% + select(starts_with("ae_qaly")),BSC = 0) + + cabo_nivo_QALY_allrisk <-colSums(rbind(cabo_nivo_QALY_allrisk, cabo_nivo_AEQALY_allrisk, use.names=FALSE)) + + cabo_nivo_QALY_allrisk <- cabo_nivo_QALY_allrisk[c(1:2,9,3:8)] + + comparator_QALY_allrisk <- res$weighted_model_disc$pop_1[L1 == comparator_no_allrisk] %>% + select(starts_with("qaly")) + + comparator_AEQALY_allrisk <- cbind(res$weighted_model_disc$pop_1[L1 == comparator_no_allrisk] %>% + select(starts_with("ae_qaly")),BSC = 0) + + comparator_QALY_allrisk <-colSums(rbind(comparator_QALY_allrisk, comparator_AEQALY_allrisk, use.names=FALSE)) + + comparator_QALY_allrisk <- comparator_QALY_allrisk[c(1:2,9,3:8)] + + + ft_QALY_all_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_allrisk, + comparator_outcomes = comparator_QALY_allrisk, + comparator_no = comparator_no_allrisk, + LYorQALY = "QALY") + + + # Fav risk + + cabo_nivo_QALY_favrisk <- res$weighted_model_disc$pop_2[L1 == 1] %>% + select(starts_with("qaly")) + + cabo_nivo_AEQALY_favrisk <- cbind(res$weighted_model_disc$pop_2[L1 == 1] %>% + select(starts_with("ae_qaly")),BSC = 0) + + cabo_nivo_QALY_favrisk <-colSums(rbind(cabo_nivo_QALY_favrisk, cabo_nivo_AEQALY_favrisk, use.names=FALSE)) + + cabo_nivo_QALY_favrisk <- cabo_nivo_QALY_favrisk[c(1:2,9,3:8)] + + comparator_QALY_favrisk <- res$weighted_model_disc$pop_2[L1 == comparator_no_favrisk] %>% + select(starts_with("qaly")) + + comparator_AEQALY_favrisk <- cbind(res$weighted_model_disc$pop_2[L1 == comparator_no_favrisk] %>% + select(starts_with("ae_qaly")),BSC = 0) + + comparator_QALY_favrisk <-colSums(rbind(comparator_QALY_favrisk, comparator_AEQALY_favrisk, use.names=FALSE)) + + comparator_QALY_favrisk <- comparator_QALY_favrisk[c(1:2,9,3:8)] + + + ft_QALY_fav_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_favrisk, + comparator_outcomes = comparator_QALY_favrisk, + comparator_no = comparator_no_favrisk, + LYorQALY = "QALY") + + + # Int/poor risk + + cabo_nivo_QALY_IPrisk <- res$weighted_model_disc$pop_3[L1 == 1] %>% + select(starts_with("qaly")) + + cabo_nivo_AEQALY_IPrisk <- cbind(res$weighted_model_disc$pop_1[L1 == 1] %>% + select(starts_with("ae_qaly")),BSC = 0) + + cabo_nivo_QALY_IPrisk <-colSums(rbind(cabo_nivo_QALY_IPrisk, cabo_nivo_AEQALY_IPrisk, use.names=FALSE)) + + cabo_nivo_QALY_IPrisk <- cabo_nivo_QALY_IPrisk[c(1:2,9,3:8)] + + comparator_QALY_IPrisk <- res$weighted_model_disc$pop_3[L1 == comparator_no_IPrisk] %>% + select(starts_with("qaly")) + + comparator_AEQALY_IPrisk <- cbind(res$weighted_model_disc$pop_3[L1 == comparator_no_IPrisk] %>% + select(starts_with("ae_qaly")),BSC = 0) + + comparator_QALY_IPrisk <-colSums(rbind(comparator_QALY_IPrisk, comparator_AEQALY_IPrisk, use.names=FALSE)) + + comparator_QALY_IPrisk <- comparator_QALY_IPrisk[c(1:2,9,3:8)] + + + ft_QALY_IP_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_IPrisk, + comparator_outcomes = comparator_QALY_IPrisk, + comparator_no = comparator_no_IPrisk, + LYorQALY = "QALY") + + + # Create tables for overall cost breakdown + + cost_type <- c("drug" , "admin" , "mru" , "eol" , "ae_cost") + + populations <- names(res$weighted_model_disc) + + summary_costs_table <- rbindlist(lapply(populations, function(popu) { + treatments <- res$weighted_model_disc[[popu]]$L1 + rbindlist(lapply(treatments, function(mol) { + ff_cost_table( + disc_results = res$weighted_model_disc, + trt_no = mol, + pop = popu + ) + })) + })) + + summary_costs_table[, risk_pop := str_replace(Population, "Int/poor", "Intermediate / poor risk")] + summary_costs_table[, Population := NULL] + + summary_costs_table <- summary_costs_table[order(risk_pop, Total)] # order by increasing costs + + ft_cost_tab <- summary_costs_table %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(summary_costs_table)))) %>% + add_header_row(top = TRUE, values = c("","1L costs", "Subsequent treatment", "MRU","",""), colwidths = c(1,3,3,2,1,1)) %>% + theme_box() |> + set_header_labels( + values = list( + Treatment = "Technologies", + L1_drug = "Drug cost", + L1_admin = "Admin cost", + L1_ae = "AE cost", + subs_drug = "Drug cost", + subs_admin = "Admin cost", + subs_ae = "AE cost", + mru_1L = "1L", + subs_mru = "Subsequent treatment", + eol_cost = "EOL cost", + Total = "Total cost" + ) + ) %>% + colformat_double(j=c(2:11), digits = 0, prefix = "£") %>% + add_footer_lines("Abbreviations: admin, administration; AE, adverse event; EOL, end of life; MRU, medical resource use") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + align(i= NULL, align = "center", part = c("header")) %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + # produce break downs by population + + intervention_name <- p$basic$lookup$ipd$mol[Number == 1]$Description + + # all risk + + comparator_name <- p$basic$lookup$ipd$mol[Number == comparator_no_allrisk]$Description + + cost_breakdown_2 <- rbind( + summary_costs_table[risk_pop == "All risk" & Treatment == intervention_name ], + summary_costs_table[risk_pop == "All risk" & Treatment == comparator_name] + ) + + # reshape the data: + cb2 <- melt.data.table(cost_breakdown_2,id.vars = c("Treatment","risk_pop")) + cb2$risk_pop <- NULL + cb2 <- dcast.data.table(cb2, variable ~ Treatment) + colnames(cb2) <- c("Type", "Int", "Comp") + cb2$Inc <- cb2[,Int] - cb2[,Comp] + cb2$abs <- abs(cb2$Inc) + cb2$abs[10] <- sum(cb2$abs[1:9]) + cb2$abspercent <- cb2$abs / cb2$abs[10] * 100 + + cb2[,1] <- c("Drug acquisition cost (1L)", "Admin cost (1L)", "AE cost (1L)", "Drug acquisition cost (2L+)", "Admin cost (2L+)", "AE cost (2L+)", "MRU 1L", "MRU 2L+", "EOL","Total") + + cost_table_2_allrisk <- ff_cost_byrisk_table(cb2, comparator_no_allrisk) + + # favourable risk + + comparator_name <- p$basic$lookup$ipd$mol[Number == comparator_no_favrisk]$Description + + cost_breakdown_2 <- rbind(summary_costs_table[risk_pop == "Favourable risk" & Treatment == intervention_name ], summary_costs_table[risk_pop == "Favourable risk" & Treatment == comparator_name]) + cost_breakdown_2 <- as.data.table(x = t(cost_breakdown_2), stringsAsFactors = FALSE) + cost_breakdown_2 <- cbind(colnames(summary_costs_table), cost_breakdown_2) + cost_breakdown_2 <- cost_breakdown_2[2:11,] + cost_breakdown_2[,2:3] <- lapply(cost_breakdown_2[,2:3], as.numeric) + colnames(cost_breakdown_2) <- c("Type", "Int", "Comp") + cost_breakdown_2$Inc <- cost_breakdown_2[,Int] - cost_breakdown_2[,Comp] + cost_breakdown_2$abs <- abs(cost_breakdown_2$Inc) + cost_breakdown_2$abs[10] <- sum(cost_breakdown_2$abs[1:9]) + cost_breakdown_2$abspercent <- cost_breakdown_2$abs / cost_breakdown_2$abs[10] * 100 + + cost_breakdown_2[,1] <- c("Drug acquisition cost (1L)", "Admin cost (1L)", "AE cost (1L)", "Drug acquisition cost (2L+)", "Admin cost (2L+)", "AE cost (2L+)", "MRU 1L", "MRU 2L+", "EOL","Total") + + + cost_table_2_favrisk <- ff_cost_byrisk_table(cost_breakdown_2, comparator_no_favrisk) + + + # int / poor risk + + comparator_name <- p$basic$lookup$ipd$mol[Number == comparator_no_IPrisk]$Description + + cost_breakdown_2 <- rbind(summary_costs_table[risk_pop == "Intermediate / poor risk" & Treatment == intervention_name ], summary_costs_table[risk_pop == "Intermediate / poor risk" & Treatment == comparator_name]) + cost_breakdown_2 <- as.data.table(x = t(cost_breakdown_2), stringsAsFactors = FALSE) + cost_breakdown_2 <- cbind(colnames(summary_costs_table), cost_breakdown_2) + cost_breakdown_2 <- cost_breakdown_2[2:11,] + cost_breakdown_2[,2:3] <- lapply(cost_breakdown_2[,2:3], as.numeric) + colnames(cost_breakdown_2) <- c("Type", "Int", "Comp") + cost_breakdown_2$Inc <- cost_breakdown_2[,Int] - cost_breakdown_2[,Comp] + cost_breakdown_2$abs <- abs(cost_breakdown_2$Inc) + cost_breakdown_2$abs[10] <- sum(cost_breakdown_2$abs[1:9]) + cost_breakdown_2$abspercent <- cost_breakdown_2$abs / cost_breakdown_2$abs[10] * 100 + + cost_breakdown_2[,1] <- c("Drug acquisition cost (1L)", "Admin cost (1L)", "AE cost (1L)", "Drug acquisition cost (2L+)", "Admin cost (2L+)", "AE cost (2L+)", "MRU 1L", "MRU 2L+", "EOL","Total") + + + cost_table_2_IPrisk <- ff_cost_byrisk_table(cost_breakdown_2, comparator_no_IPrisk) + + + #### Scenario analysis tables + + + # all risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_allrisk, "pop_1", p$basic$structure) + + Scenario_table_allrisk <- ff_scenario_table(Scenario_table) + + + # favourable risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_favrisk, "pop_2", p$basic$structure) + + Scenario_table_favrisk <- ff_scenario_table(Scenario_table) + + # int/poor risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_IPrisk, "pop_3", p$basic$structure) + + Scenario_table_IPrisk <- ff_scenario_table(Scenario_table) + + # base case table + + ft_basecase <- ft_wa_inc %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(ft_wa_inc)))) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + costs = "Costs (£)", + ly = "LYG", + qalys = "QALYs", + ic = "Inc. Costs", + il = "Inc. LYG", + iq = "Inc. QALYs", + Pairwise_ICER = "ICER cabo + nivo vs comparator", + ICER = "ICER incremental" + ) + ) %>% + flextable::colformat_double(j=c(2,5,8,9), digits = 0, prefix = "£") %>% + flextable::colformat_double(j=c(3,4,6,7), digits = 2) %>% + add_footer_lines("Abbreviations: ICER, incremental cost-effectiveness ratio; LYG, life-years gained; QALY, quality-adjusted life-year") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) + + + # Severity modifier + + severity_table <- data.table(do.call(rbind, res$mk$qaly_shortfall_1_to_3)) + severity_table <- cbind(risk_pop = p$basic$lookup$pop_map$Risk.population[1:3], severity_table) + + severity_table <- rbind(severity_table, f_res_cabonivo_SevMod( + res = res, + oo_pop_string = "Poor / intermediate risk", + pop_n = 3, + comp_numb = 5)) + + severity_table <- rbind(severity_table, f_res_cabonivo_SevMod( + res = res, + oo_pop_string = "Poor / intermediate risk", + pop_n = 3, + comp_numb = 8)) + + setDT(severity_table)[, risk_pop := str_replace(risk_pop, "Favourable risk", "Fav")] + setDT(severity_table)[, risk_pop := str_replace(risk_pop, "All risk", "All")] + + severity_table$SOC <- unlist(lapply(1:nrow(severity_table), function(mol) { + p$basic$lookup$ipd$mol[Number == severity_table$SOC[mol]]$Description } )) + + + + + + ft_severity_mod <- ff_severity_table(severity_table) + + + # Scenario analysis pairwise results + + ft_all_pairwise <- do.call(rbind,lapply(structure(names(res$pairwise_vs_mol),.Names=names(res$pairwise_vs_mol)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- res$pairwise_vs_mol[[popu_txt]] + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + + return(popu) + + })) + + ft_all_pairwise$ICER[is.na(ft_all_pairwise$icer) != TRUE] <- as.character(paste0("£",round(ft_all_pairwise$icer[is.na(ft_all_pairwise$icer) != TRUE] ,0))) + + ft_all_pairwise[ft_all_pairwise$icer < 0 & ft_all_pairwise$iq < 0]$ICER <- "Cabo+nivo dominated" + ft_all_pairwise[ft_all_pairwise$icer < 0 & ft_all_pairwise$iq > 0]$ICER <- "Cabo+nivo dominant" + ft_all_pairwise[ft_all_pairwise$icer > 0 & ft_all_pairwise$iq < 0]$ICER <- paste0("SW quadrant ",ft_all_pairwise[ft_all_pairwise$icer > 0 & ft_all_pairwise$iq < 0]$ICER ) + + ft_all_pairwise[,icer:=NULL] + + setDT(ft_all_pairwise)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + ft_all_pairwise_tab <- ff_scenario_pairwise_table(ft_all_pairwise ) + + + # Outputting report (state transition) ------------------------------------------------------ + + # Add base case results. + doc_res <- doc_res %>% + body_add_table_legend(paste0("Base-case results (ordered in increasing cost)"), + bookmark = "tab1") %>% + body_add_flextable(ft_basecase, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + + doc_res <- body_end_section_landscape(doc_res) + + doc_res <- doc_res %>% + body_add_par("Qualification for the severity modifier",style = "heading 2") %>% + body_add_table_legend(paste0("Application of the severity modifier to the base case"), + bookmark = "tab2") %>% + body_add_flextable(ft_severity_mod, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + + doc_res <- doc_res %>% + body_add_par("Breakdowns by health state and cost category",style = "heading 2") %>% + body_add_table_legend(paste0("Summary of LY gain by health state (all risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_allrisk]$Description,")"), + bookmark = "tab3") %>% + body_add_flextable(ft_LY_all_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of LY gain by health state (favourable risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_favrisk]$Description,")"), + bookmark = "tab4") %>% + body_add_flextable(ft_LY_fav_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of LY gain by health state (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_IPrisk]$Description,")"), + bookmark = "tab5") %>% + body_add_flextable(ft_LY_IP_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of QALY gain by health state (all risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_allrisk]$Description,")"), + bookmark = "tab1") %>% + body_add_flextable(ft_QALY_all_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of QALY gain by health state (favourable risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_favrisk]$Description,")"), + bookmark = "tab6") %>% + body_add_flextable(ft_QALY_fav_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of QALY gain by health state (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_IPrisk]$Description,")"), + bookmark = "tab7") %>% + body_add_flextable(ft_QALY_IP_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + + doc_res <- body_end_section_portrait(doc_res) + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of costs by health state"), + bookmark = "tab8") %>% + body_add_flextable(ft_cost_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- body_end_section_landscape(doc_res) + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of predicted resource use by category of cost (all risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_allrisk]$Description,")"), + bookmark = "tab1") %>% + body_add_flextable(cost_table_2_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of predicted resource use by category of cost (favourable risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_favrisk]$Description,")"), + bookmark = "tab9") %>% + body_add_flextable(cost_table_2_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of predicted resource use by category of cost (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " ,p$basic$lookup$ipd$mol[Number == comparator_no_IPrisk]$Description,")"), + bookmark = "tab10") %>% + body_add_flextable(cost_table_2_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + + population_table <- as.data.table(cbind(risk_pop = p$basic$lookup$pop_map$Risk.population[1:3],pop_labels = c("pop_1","pop_2","pop_3"))) + setDT(population_table)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + + + for (popu in population_table$pop_labels) { + for(mol in names(res$weighted_trace_plots[[popu]]$plots)) { + print(paste0(popu,mol)) + doc_res <- doc_res %>% body_add_figure_legend( + legend = paste0("Markov trace: ", population_table[pop_labels == popu]$risk_pop , ", ", p$basic$lookup$ipd$mol[Number == str_sub(mol, -1, -1)]$Description), + bookmark = "fig1" + ) %>% + body_add_plot(print(res$weighted_trace_plots[[popu]]$plots[mol]), width = 6) %>% + body_add_par( + paste0( + "Abbreviations: L1, 1st line; L2, 2nd line; L3, 3rd line; L4, 4th line; L5, 5th line" + ), style = "Table footnote" + ) %>% body_add_break() + + } + } + + + doc_res <- doc_res %>% body_add_break() %>% + body_add_par("Cost-effectiveness acceptability frontiers",style = "heading 2") %>% + body_add_par( + paste0("Cost-effectiveness acceptability frontiers are presented for all non-dominated treatments for each of the risk groups")) %>% + body_add_break() %>% + body_add_figure_legend( + legend = paste0("Cost-effectiveness acceptability frontier – all risk"), + bookmark = "fig2") %>% + body_add_plot(print(res$weighted_incremental$pop_1$p), height = 4) %>% + body_add_par( + paste0("Abbreviations: QALYs, quality-adjusted life-years"), style = "Table footnote") + + doc_res <- doc_res %>% + body_add_figure_legend( + legend = paste0("Cost-effectiveness acceptability frontier – favourable risk"), + bookmark = "fig3") %>% + body_add_plot(print(res$weighted_incremental$pop_2$p), height = 4) %>% + body_add_par( + paste0("Abbreviations: QALYs, quality-adjusted life-years"), style = "Table footnote") + + doc_res <- doc_res %>% + body_add_figure_legend( + legend = paste0("Cost-effectiveness acceptability frontier – intermediate / poor risk"), + bookmark = "fig4") %>% + body_add_plot(print(res$weighted_incremental$pop_3$p), height = 4) %>% + body_add_par( + paste0("Abbreviations: QALYs, quality-adjusted life-years"), style = "Table footnote") %>% + + body_add_break() + + doc_res <- body_end_section_portrait(doc_res) + + + doc_res <- doc_res %>% + body_add_par("Scenario analysis style tables",style = "heading 1") %>% + body_add_table_legend( + legend = paste0("Scenario analysis - all risk"), + bookmark = "tab11") %>% + body_add_flextable(Scenario_table_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis - favourable risk"), + bookmark = "tab12") %>% + body_add_flextable(Scenario_table_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis - intermediate / poor risk"), + bookmark = "tab13") %>% + body_add_flextable(Scenario_table_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis pairwise comparison table"), + bookmark = "tab14") %>% + body_add_flextable(ft_all_pairwise_tab , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + } else { + + # Producing report tables (PartSA) ------------------------------------------------------ + + + # Make LY table + + PartSA_Lys <- do.call(rbind,lapply(structure(names(res$ly),.Names=names(res$ly)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- as.data.table(res$ly[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + popu <- data.table(L1 = rownames(res$ly[[popu_txt]]), popu) + popu$L1 <- lu_mol[match(popu$L1,lu_mol$RCC_input_desc),]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_Lys)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_Lys$Total <- rowSums(PartSA_Lys[,2:5]) + + PartSA_LYs_table <- ff_PartSALY_table(PartSA_Lys) + + # Make QALYs table + + PartSA_QALYs <- do.call(rbind,lapply(structure(names(res$disc_qaly),.Names=names(res$disc_qaly)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- as.data.table(res$disc_qaly[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + popu <- data.table(L1 = rownames(res$disc_qaly[[popu_txt]]), popu) + popu$L1 <- lu_mol[match(popu$L1,lu_mol$RCC_input_desc),]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_QALYs)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_QALYs$Total <- rowSums(PartSA_QALYs[,2:5]) + + PartSA_QALYs_table <- ff_PartSAQALY_table(PartSA_QALYs) + + # Make costs table + + PartSA_costs <- do.call(rbind,lapply(structure(names(res$disc_cost),.Names=names(res$disc_cost)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- as.data.table(res$disc_cost[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + popu <- data.table(L1 = rownames(res$disc_cost[[popu_txt]]), popu) + popu$L1 <- lu_mol[match(popu$L1,lu_mol$RCC_input_desc),]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_costs)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_costs$Total <- rowSums(PartSA_costs[,2:11]) + + PartSA_costs_table <- ff_PartSAcost_table (PartSA_costs) + + # Make results table + + PartSA_wa <- do.call(rbind,lapply(structure(names(res$incremental),.Names=names(res$incremental)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + if (is.null(res$incremental[[popu_txt]]$non_dominated)) { + popu <- as.data.table(res$incremental[[popu_txt]]) + popu <- data.table(popu, ic = 0, iq = 0, il = 0, ICER = "Dominant" ) + popu$str_dom <- NULL + + } else { + popu <- as.data.table(res$incremental[[popu_txt]]$expanded_results) + popu$ICER[popu$extdom==FALSE] <- as.character(paste0("£",round(popu$ICER[popu$extdom==FALSE] ,0))) + popu$ICER[popu$extdom==TRUE] <- "(ext dominated)" + popu$str_dom <- NULL + popu$extdom <- NULL + popu$r <- NULL + + } + + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + PartSA_wa <- PartSA_wa[,c(2,3,1,4,5,7,6,8,9)] + + + + PartSA_totals <- do.call(rbind,lapply(structure(names(res$tables$top_line),.Names=names(res$tables$top_line)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + lu_mol <- p$basic$lookup$ipd$mol + + popu <- as.data.table(res$tables$top_line[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + popu$L1 <- lu_mol[match(popu$L1,lu_mol$Number),]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + PartSA_totals <- PartSA_totals[order( risk_pop, costs)] # order by increasing costs + + PartSA_totals$L1_risk <- paste(PartSA_totals$L1, PartSA_totals$risk_pop) + + PartSA_wa$L1_risk <- paste(PartSA_wa$L1, PartSA_wa$risk_pop) + + PartSA_results <- merge(PartSA_totals, PartSA_wa, all.x = TRUE) + PartSA_results[is.na(ICER)]$ICER <- "(dominated)" + + PartSA_results <- PartSA_results[,c(4,1,3,2,7,8,9,10,5)] + + PartSA_results <- PartSA_results[order( risk_pop, costs)] # order by increasing costs + + + PartSA_Pairwise <- do.call(rbind,lapply(structure(names(res$tables$top_line),.Names=names(res$tables$top_line)), function(popu_txt) { + + lu_pop <- p$basic$lookup$pop_map + + popu <- f_res_ICER_pairwiseVsoneTrt(res$tables$top_line[[popu_txt]], 1, p$basic$lookup$ipd$mol) + popu_n <- as.numeric(gsub("pop_","",popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n,]$Risk.population,nrow(popu)) + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop=rsk_popu_lab) + + return(popu) + + })) + + + PartSA_Pairwise$Pairwise_ICER[is.na(PartSA_Pairwise$icer) != TRUE] <- as.character(paste0("£",round(PartSA_Pairwise$icer[is.na(PartSA_Pairwise$icer) != TRUE] ,0))) + + + PartSA_Pairwise[PartSA_Pairwise$icer < 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER <- "Cabo+nivo dominated" + PartSA_Pairwise[PartSA_Pairwise$icer < 0 & PartSA_Pairwise$iq > 0]$Pairwise_ICER <- "Cabo+nivo dominant" + PartSA_Pairwise[PartSA_Pairwise$icer > 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER <- paste0("SW quadrant ",PartSA_Pairwise[PartSA_Pairwise$icer > 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER) + + PartSA_Pairwise_Scen <- PartSA_Pairwise + + PartSA_Pairwise <- PartSA_Pairwise[,c(4,9,10)] + + + PartSA_results <- merge(PartSA_results, PartSA_Pairwise, all.x = TRUE) + + PartSA_results <- PartSA_results[,c(1:8,10,9)] + + PartSA_results[ICER == 0]$ICER <- "(dominated)" + PartSA_results[,6] <- as.numeric(unlist(PartSA_results[,6][[1]])) + PartSA_results[,7] <- as.numeric(unlist(PartSA_results[,7][[1]])) + PartSA_results[,8] <- as.numeric(unlist(PartSA_results[,8][[1]])) + + PartSA_results<- PartSA_results[order( risk_pop, costs)] # order by increasing costs + setDT(PartSA_results)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + PartSA_results_tab <-ff_PartSAresults_table(PartSA_results) + + #### Scenario analysis tables + + comparator_no_allrisk <- ff_closest_comparator_PartSA(res,"pop_1") + comparator_no_favrisk <- ff_closest_comparator_PartSA(res,"pop_2") + comparator_no_IPrisk <- ff_closest_comparator_PartSA(res,"pop_3") + + + # all risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_allrisk, "pop_1", p$basic$structure) + + Scenario_table_allrisk <- ff_scenario_table(Scenario_table) + + # favourable risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_favrisk, "pop_2", p$basic$structure) + + Scenario_table_favrisk <- ff_scenario_table(Scenario_table) + + # int/poor risk + + Scenario_table <- ff_scenario_output(res, Scenario_name, comparator_no_IPrisk, "pop_3", p$basic$structure) + + Scenario_table_IPrisk <- ff_scenario_table(Scenario_table) + + # Scenario analysis pairwise results + + setDT(PartSA_Pairwise_Scen)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + PartSA_Pairwise_Scen <- PartSA_Pairwise_Scen[, c(4,1,2,3,5,6,7,10,9)] + PartSA_Pairwise_Scen$ICER <- PartSA_Pairwise_Scen$Pairwise_ICER + PartSA_Pairwise_Scen$Pairwise_ICER <- NULL + + ft_all_pairwise_tab <- ff_scenario_pairwise_table(PartSA_Pairwise_Scen ) + + + # Outputting report (PartSA) ------------------------------------------------------ + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA life years"), + bookmark = "tab1") %>% + body_add_flextable(PartSA_LYs_table, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA QALYs"), + bookmark = "tab2") %>% + body_add_flextable(PartSA_QALYs_table, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA costs"), + bookmark = "tab3") %>% + body_add_flextable(PartSA_costs_table, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA results (ordered in increasing cost)"), + bookmark = "tab4") %>% + body_add_flextable(PartSA_results_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_par("Scenario analysis style table",style = "heading 1") %>% + body_add_table_legend( + legend = paste0("Scenario analysis style table - all risk"), + bookmark = "tab5") %>% + body_add_flextable(Scenario_table_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis style table - favourable risk"), + bookmark = "tab6") %>% + body_add_flextable(Scenario_table_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis style table - intermediate / poor risk"), + bookmark = "tab7") %>% + body_add_flextable(Scenario_table_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis pairwise comparison table"), + bookmark = "tab8") %>% + body_add_flextable(ft_all_pairwise_tab , + align = "left", + topcaption = TRUE, + split = TRUE) + + + } + + + print(doc_res, target = paste0("./4_Output/Scenario ",Scenario_number,"_",i$dd_drug_price_options,gsub(":","_",Run_date),".docx")) + rm(doc_res) + + + } + + +} + +# END OF CODE ------------------------------------------------------- + + +#### A note on planned model changes +# This model is currently undergoing internal and external QC via the NICE DSU +# The following changes are planned to the code following technical engagement: +# - Incorporation of PSA +# - Incorporation of functionality to allow scenario analyses around the impact of prior adjuvant IO treatment + + +#### Additional changes will be made during Phase 2 of this pilot following use for the initial decision problem including +# - Addition of Shiny user interface +# - Genericisation of the code to allow wider use +# - Programming and analysis of model outputs related specifically to sequencing, this may include value of information analyses + + diff --git a/2_Scripts/standalone scripts/results processing/re-draw CEAC axes.R b/2_Scripts/standalone scripts/results processing/re-draw CEAC axes.R new file mode 100644 index 0000000..4fc6886 --- /dev/null +++ b/2_Scripts/standalone scripts/results processing/re-draw CEAC axes.R @@ -0,0 +1,37 @@ +### This code produces CEAFs with the cost axes correctly outputting following HPC run issues with encoding + +# Set population + +popu <- "pop_3" + +# Set nsdom and reduced_table for the population + +nsddom <- res$weighted_incremental[[popu]]$not_strictly_dominated +reduced_table <- res$weighted_incremental[[popu]]$expanded_results[extdom == FALSE,] + +# run plot + +ggplot(nsddom, aes(x = qalys, y = costs, colour = as.factor(L1), label = r)) + + geom_point() + + theme_classic() + + # ggrepel::geom_text_repel(max.overlaps = 100, alpha = 0.2) + + geom_line(data = reduced_table, aes(x=qalys,y=costs,colour=NULL)) + + # ggrepel::geom_label_repel( + # data = reduced_table, + # # arrow = arrow(ends = "last",type = "closed"), + # aes( + # x = qalys, + # y = costs, + # colour = NULL, + # label = as.factor(L1), + # ))+ + theme(legend.position = "bottom") + + scale_x_continuous(limits = c(0,max(nsddom$qalys)), expand = expansion(mult = c(0,0.05))) + + scale_y_continuous( + limits = c(0, max(nsddom$costs)), + expand = expansion(mult = c(0, 0.05)), + labels = label_dollar(prefix = "£") + ) + + labs(x= "QALYs", y = "Costs") + + theme(legend.title = element_blank()) + diff --git a/3_Functions/1_Data/elsnumalph.csl b/3_Functions/1_Data/elsnumalph.csl new file mode 100644 index 0000000..8d35a0a --- /dev/null +++ b/3_Functions/1_Data/elsnumalph.csl @@ -0,0 +1,134 @@ + + diff --git a/3_Functions/1_Data/elsvan.csl b/3_Functions/1_Data/elsvan.csl new file mode 100644 index 0000000..be85b6d --- /dev/null +++ b/3_Functions/1_Data/elsvan.csl @@ -0,0 +1,163 @@ + + diff --git a/3_Functions/Readme.Rmd b/3_Functions/Readme.Rmd new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/3_Functions/Readme.Rmd @@ -0,0 +1 @@ + diff --git a/3_Functions/adverse_events/AE_steps.R b/3_Functions/adverse_events/AE_steps.R new file mode 100644 index 0000000..e4c691a --- /dev/null +++ b/3_Functions/adverse_events/AE_steps.R @@ -0,0 +1,94 @@ +# adverse event coding + +#' Calculate expected cost per patient per week cost and QALY penalty by treatment +#' +#' @param AE_costs Should be i$R_table_AE_costs +#' @param AE_disutil Should be i$R_table_AE_util +#' @param AE_duration Should be i$R_table_duration +#' @param AE_rate Should be i$R_table_AE_rates +#' @param comparators Should be i$lookup$trt +#' @param weeks_per_year Number of weeks per year, e.g., calculated as p$basic$cl_w / p$basic$cl_y +#' @param PSA Sample from PSA distribution (TRUE/FALSE) +#' +#' @details This function takes the individual inputs for adverse events and processes +#' them into a format that fits in with the way information is organised +#' throughout the rest of the model structure. This uses `data.table` syntax, +#' so some practice/experience with that package is recommended before +#' conducting a review. +#' +#' To test this function: The recommended way to check functionality would +#' be to load an input excel file within `Model_structure.R`, running that +#' code up to the point at which `f_process_adverse_events` is called. +#' One can then use the values from `i` and `p` which populate the +#' arguments to the function, assigning each one in order to then +#' go line by line through the below code. This will enable the user +#' to replicate the calculations from within this function side-by-side +#' in excel to check that they are correct. +#' +#' +f_process_adverse_events <- function(AE_costs, AE_disutil, AE_duration, AE_rate, comparators, weeks_per_year, PSA) { + + AE_rate <- data.table(AE_rate) + AE_rate[, duration_weeks := NULL] # Delete as not used in this function and risk of confusion + + if (PSA == FALSE) { + AE_cost_QALYs_per_episode <- merge( + merge( + AE_duration[c("AE", "Duration_Mean")], + AE_disutil[c("AE", "Disutil_mean")], + all = TRUE, + by = "AE"), + AE_costs[c("AE", "Cost.per.event_Mean")], + all = TRUE, + by = "AE" + ) + colnames(AE_cost_QALYs_per_episode) <- c("AE", "duration", "utility", "cost") + + # AE durations are given in weeks, but QALYs are given in years + AE_cost_QALYs_per_episode$QALYs <- with(AE_cost_QALYs_per_episode, utility * duration / weeks_per_year) + + # Estimate AE rates for each line of each treatment + all_combinations <- CJ( + Treatment.name = names(comparators), + Treatment.line = 1:4, + AE = unique(AE_costs$AE) + ) + all_combinations[, Molecule := comparators[Treatment.name]] + AE_rates_expanded <- AE_rate[ + all_combinations, + on = .(AE = AE, Molecule = Molecule, Treatment.name = Treatment.name, Treatment.line = Treatment.line) + ] + setorder(AE_rates_expanded, Molecule, AE, Treatment.line) + + AE_rates_expanded[, Rate.per.patient.per.week := nafill(Rate.per.patient.per.week, "locf"), by = .(Molecule, AE)] + AE_rates_expanded[, Rate.per.patient.per.week := nafill(Rate.per.patient.per.week, "nocb"), by = .(Molecule, AE)] + AE_rates_expanded[, Rate.per.patient.per.week := nafill(Rate.per.patient.per.week, "const", fill = 0.0), by = .(Molecule, AE)] + + # Join the costs and QALYs per episode + AE_cost_QALYs_by_treatment_and_AE <- AE_rates_expanded[ + AE_cost_QALYs_per_episode[c("AE", "cost", "QALYs")], + on = .(AE = AE) + ] + + # Weight by rate and sum for the result + result <- AE_cost_QALYs_by_treatment_and_AE[ + , + .(cost = sum(Rate.per.patient.per.week * cost), QALYs = sum(Rate.per.patient.per.week * QALYs)), + by = .(Treatment.name, Molecule, Treatment.line) + ] + setorder(result, Treatment.line, Molecule) + result[, Molecule := NULL] + setnames(result, c("trt", "line", "cost", "QALYs")) + setDF(result) + + # Print a helpful message and return + cat("Mean cost and QALY impact due to AEs per patient per week:\n") + # print(result) + return(result) + + } else { + #code to sample iterations times from distributions of duration, cost and utility decrements + stop("PSA sampling for AE costs and QALYs not implemented") + } + +} \ No newline at end of file diff --git a/3_Functions/costs/costs.R b/3_Functions/costs/costs.R new file mode 100644 index 0000000..4030fd3 --- /dev/null +++ b/3_Functions/costs/costs.R @@ -0,0 +1,122 @@ +#' Function to "clean" the drug cost input table from excel. This function is +#' not currently used, but may be adapted in the future. +#' +#' @param drug_cost_table the input table from excel for drug costs. +#' +f_clean_drug <- function(drug_cost_table) { + + tab <- as.data.table(drug_cost_table) + + # Now cycle through the individual treatments using the id of trt for each one, + # retaining their grouping but separating different combination regimen - + # + # NOTE THAT THIS MEANS THAT THE SAME MOLECULE WILL BE IN THIS TABLE MULTIPLE + # TIMES, ASSOCIATED WITH DIFFERENT COMBINATIONS. + # + # This actually minimises repetition and allows for different formulations of + # the same chemical (like flat-dose vs weight-based nivo) + + mols <- paste0("mol_",unique(tab$Molecule)) + + + trt_list <- lapply(unique(tab$Molecule), function(trt) { + + # Filter down to this "Molecule", i.e. regimen (or reg for short) + reg <- tab[Molecule == trt,] + + # Record the number of components + n_comp <- nrow(reg[!is.na(reg$Drug.cost.per.dose),]) + comp_id <- reg$Treatment.name[!is.na(reg$Drug.cost.per.dose)] + names(comp_id) <- comp_id + + # Each component needs its own list entry so that different regimen can + # have different numbers of combinates and the resulting data don't need + # any special code for individual regimen. Means that future expansions + # require no further work + + component_list <- lapply(comp_id, function(combinate) { + list( + type = reg[Drug.name == combinate, Per.cycle.or.one.off.cost], + cpd = reg[Drug.name == combinate, Drug.cost.per.dose], + acpd = reg[Drug.name == combinate, Admin.cost.per.admin], + acpd_se = reg[Drug.name == combinate, Admin.cost.SE], + dos_freq = reg[Drug.name == combinate, Applied.every.x.cycles], + dos_quant = reg[Drug.name == combinate, Number.of.doses.when.applied], + rdi = reg[Drug.name == combinate, RDI], + rdi_se = reg[Drug.name == combinate, RDI.SE], + start = reg[Drug.name == combinate, Time.from], + stop = reg[Drug.name == combinate, Time.to..cycles.] + ) + }) + return(component_list) + + }) + names(trt_list) <- mols + return(trt_list) +} + +#' This function calculates the components for each part of the drug and admin costs. NOT CURRENTLY USED +#' +#' @details This function was used in a previous version of the model, and is no +#' longer applied. +#' +f_drug_calcComponentCost <- function(health_state, TH, multi = FALSE, n = NULL) { + + # Take into account whether iterative or not: + if (multi) { + q <- drug_i$dos_quant[n] + f <- drug_i$dos_freq[n] + r <- drug_i$rdi[n] + if (is.na(drug_i$cpd)) { + c <- 0 + } else { + c <- drug_i$cpd[n] * drug_i$rdi[n] # note some drug costs vary via eMIT + } + ac <- drug_i$acpd[n] + } else { + q <- drug_i$dos_quant + f <- drug_i$dos_freq + r <- drug_i$rdi + if (is.na(drug_i$cpd)) { + c <- 0 + } else { + c <- drug_i$cpd * drug_i$rdi + } + ac <- drug_i$acpd + } + + if (drug_i$type == "One off") { + sched <- c(1,rep(0,TH-1)) + } else { + # Calculate the dosing schedule over time: + sched <- rep(c(q,rep(0,f - 1)),(TH + 1) / floor(f))[1:TH] + + # Add min start duration if it's active - THIS IS ABSOLUTE TIME at the moment which will need updating + if (drug_i$start > 0) { + sched[1:drug_i$start] <- 0 + } + # Add max treatment duration if it's active + if (drug_i$stop < TH) { + sched[drug_i$stop:TH] <- 0 + } + } + + # Multiply the dosing schedule by the drug cost per dose, and multiply that by RDI + if (c == 0) { + c_dr <- rep(0,TH) + } else { + c_dr <- sched * c + } + if (ac == 0) { + c_ad <- rep(0,TH) + } else { + c_ad <- sched * ac + } + + # Return the cost per cycle + return(list( + schedule = sched, + drug = c_dr, + admin = c_ad + )) +} \ No newline at end of file diff --git a/3_Functions/costs_and_QALYs/cost_processing.R b/3_Functions/costs_and_QALYs/cost_processing.R new file mode 100644 index 0000000..28b86b2 --- /dev/null +++ b/3_Functions/costs_and_QALYs/cost_processing.R @@ -0,0 +1,733 @@ +# Code to calculate costs + +#' manipulate costs data pulled from inputs spreadsheet outputs vector of drug +#' costs by week for each molecule where the column number of each matrix +#' corresponds to the cycle px starts drug at +#' +#' @param drug_and_admin Excel named range `R_table_drug_admin_costs` from the inputs file +#' @param per_cycle_costs Excel named range `R_table_MRU` from the inputs file +#' @param one_off_costs Excel named range `R_table_MRU_oneoff` from the inputs file +#' @param time_horizon Time horizon from `p$basic$th` from the inputs file +#' @param max_trt_lines Maximum number of lines the R model can handle from `p$basic$R_maxlines` +#' @param RDI_source Excel named range `dd_sc_RDI` from the inputs file +#' @param PSA NOT IMPLEMENTED YET +#' @param verbose Set to TRUE for more console output to track issues +#' +#' @details Uses the inputs tables directly from the excel input file to populate +#' drug and admin costs (note that AE costs are done separately) in a +#' format that is conducive to the way that relative efficacy and the +#' overall model structure is organised. The output is organised by: +#' +#' - category (drug, admin, mru_on, mru_off) +#' - molecule +#' - treatment line +#' +#' Allowing for much flexibility in the application of treatment costs. +#' Each bottom-level element is a vector of values from time zero to +#' the time horizon (as determined by `time_horizon`). This means that +#' in a probabilistic setting this full function would be used to generate +#' `n_psa` "sets" of values. Similarly, in the scenario analysis a different +#' excel file is loaded each time, which then leads to the inputs being +#' different if affected by that scenario. +#' +f_process_cost_data <- function(drug_and_admin, + per_cycle_costs, + time_horizon, + RDI_source, + max_trt_lines, + PSA = FALSE, + samples = NULL, + verbose = FALSE) { + + # Step one - tidying up the tables: + + # cleaning raw_tables + drug_and_admin_raw <- data.table(drug_and_admin ) + drug_and_admin_raw[is.na(drug_and_admin_raw)] <- 0 + + # Populating RDI by line, will give an error message if the line does not exist in Excel + if (RDI_source == "RDI per RWE") { + rdi_table <- drug_and_admin_raw[,c("Treatment.name","Molecule","Type",paste0("RDI.Mean_RWE_",1:max_trt_lines,"L")), with=FALSE] + setnames(rdi_table, c(paste0("RDI.Mean_RWE_",1:max_trt_lines,"L")),paste0("RDI_line_",1:max_trt_lines)) + } else if (RDI_source == "Set all RDI to 100%") { + rdi_table <- as.list(drug_and_admin_raw[,c("Treatment.name","Molecule","Type"), with=FALSE]) + RDI_1 <- lapply(1:max_trt_lines, function(x) rep(1,nrow(drug_and_admin))) + names(RDI_1) <- paste0("RDI_line_",1:max_trt_lines) + rdi_table <- as.data.table(c(rdi_table,RDI_1)) + } else { + rdi_table <- drug_and_admin_raw[,c("Treatment.name","Molecule","Type",paste0("RDI.Mean_Trial_",1:max_trt_lines,"L")), with=FALSE] + setnames(rdi_table, c(paste0("RDI.Mean_Trial_",1:max_trt_lines,"L")),paste0("RDI_line_",1:max_trt_lines)) + } + + # RDI is used to calculate drug and admin costs and represents missed doses whilst on treatment for which no drug or admin charge applies + + if(PSA == TRUE) { + + # Standard errors for relative dose intensity depending on the option the user has selected + if (RDI_source == "RDI per RWE") { + rdi_SE_table <- drug_and_admin_raw[,c("Treatment.name","Molecule","Type",paste0("RDI.SE_RWE_",1:max_trt_lines,"L")), with=FALSE] + setnames(rdi_SE_table, c(paste0("RDI.SE_RWE_",1:max_trt_lines,"L")),paste0("RDI_SE_line_",1:max_trt_lines)) + } else if (RDI_source == "Set all RDI to 100%") { + rdi_SE_table <- as.list(drug_and_admin_raw[,c("Treatment.name","Molecule","Type"), with=FALSE]) + RDI_0 <- lapply(1:max_trt_lines, function(x) rep(0,nrow(drug_and_admin))) + names(RDI_0) <- paste0("RDI_SE_line_",1:max_trt_lines) + rdi_SE_table <- as.data.table(c(rdi_SE_table,RDI_0)) + } else { + rdi_SE_table <- drug_and_admin_raw[,c("Treatment.name","Molecule","Type",paste0("RDI.SE_Trial_",1:max_trt_lines,"L")), with=FALSE] + setnames(rdi_SE_table, c(paste0("RDI.SE_Trial_",1:max_trt_lines,"L")),paste0("RDI_SE_line_",1:max_trt_lines)) + } + + # Merge the mean and SE together into one table irrespective of the option selected + rdi_table_se <- merge.data.table(rdi_table,rdi_SE_table) + + # cycle row by row creating draws, using common RNG + rdi_random_gen <- runif(samples) + + # generate samples for RDI using the set random numbers generated above + RDI_samples <- rbindlist(lapply(1:nrow(rdi_table_se), function(row_in_table) { + + dat <- rdi_table_se[row_in_table,] + + id <- dat[,list(Treatment.name,Molecule,Type)] + id <- rbindlist(lapply(1:samples, function(x) id)) + + mean_nams <- paste0("RDI_line_",1:max_trt_lines) + se_nams <- paste0("RDI_SE_line_",1:max_trt_lines) + + names(mean_nams) <- mean_nams + names(se_nams) <- se_nams + + # Compile a data.table which has 1 row per iteration with RDI for that iteration, + # which also takes into account if mean is down as 1 or SE is 0 + data.table( + id, + as.data.table(lapply(structure(1:max_trt_lines,.Names=mean_nams), function(trt_line) { + + # Estimate alpha and beta parameters for a beta distribution for this treatment + # line's RDI, then draw samples times from there using random numbs: + mn <- dat[[mean_nams[trt_line]]] + se <- dat[[se_nams[trt_line]]] + + if (mn < 1 & se > 0) { + params <- estBetaParams(mn, se ^ 2) + qbeta(rdi_random_gen, .subset2(params,"alpha"), .subset2(params,"beta")) + } else if (mn == 1 & se > 0) { + params <- estBetaParams(0.999, se ^ 2) + qbeta(rdi_random_gen, .subset2(params,"alpha"), .subset2(params,"beta")) + } else if (se == 0) { + rep(mn,samples) + } else { + rep(1,samples) + } + })), + iteration = 1:samples + ) + })) + + rm(rdi_table, rdi_table_se, rdi_random_gen) + + # This can now be used to generate and merge with the full tables: + + + # Admin costs - similarly to the rdi table, we pull the right columns and then + # generate them so we can merge it. note that "RDI" is applied to admin costs + admin_table <- + drug_and_admin_raw[, c( + "Treatment.name", + "Molecule", + "Type", + "Admin.cost..per.administration..Mean", + "Admin.cost..per.administration..SE"), + with = FALSE] + + setnames(admin_table, c("Admin.cost..per.administration..Mean", "Admin.cost..per.administration..SE"), c("mean","se")) + + admin_samples <- rbindlist(lapply(1:nrow(admin_table), function(row_in_table) { + + dat <- admin_table[row_in_table,] + + id <- dat[,list(Treatment.name,Molecule,Type)] + id <- rbindlist(lapply(1:samples, function(x) id)) + + # Call it the same name that is used in the deterministic model: + id$Admin.cost..per.administration..Mean <- rnorm(samples,mean = dat$mean,sd = dat$se) + id$iteration <- 1:samples + return(id) + })) + + + # Finally, drug costs. Some of these may have parameter uncertainty in the future + # because they come from eMIT or are generics, which have variance in pricing + + drug_table <- + drug_and_admin_raw[, c( + "Treatment.name", + "Molecule", + "Type", + "Drug.cost.per.dose", + "Applied.every.x.cycles", + "Number.of.doses.when.applied", + "Time.from", + "Time.to..cycles." + ), + with = FALSE] + + drug_table <- rbindlist(lapply(1:samples, function(psa_iteration) { + drug_table$iteration <- psa_iteration + return(drug_table) + })) + + + # Now that we have all of the components, we can simply merge them all + # since they all have id columns that are the same + + drug_and_admin <- merge.data.table(merge.data.table(drug_table,RDI_samples),admin_samples) + + # per cycle costs table is messy because the named range in excel doesn't include + # the top header row (because there are 2. This breaks the function.) + pcc <- as.list(as.data.table(per_cycle_costs[2:nrow(per_cycle_costs),])) + pcc_nam <- names(pcc) + pcc <- as.data.table(lapply(1:length(pcc), function(column_index) { + if (column_index %in% c(1,2)) { + return(pcc[[column_index]]) + } else { + return(as.numeric(pcc[[column_index]])) + } + })) + names(pcc) <- pcc_nam + rm(pcc_nam) + setnames(pcc,"Cost","mean") + setnames(pcc,"X5","SE") + + # Now perform RNG on per-cycle costs. In the lines that follow using pcc + # the table will be filtered down by type, molecule and iteration + + pcc <- rbindlist(lapply(1:nrow(pcc), function(row_in_table) { + dat <- pcc[row_in_table,] + + id <- dat[,list(Type.of.cost,Type,Molecule,Time.from..cycle.,Time.to..cycles.)] + pcc_m <- dat$mean + pcc_se <- dat$SE + id <- rbindlist(lapply(1:samples, function(x) id)) + id$mean <- rnorm(samples,pcc_m,pcc_se) + id$iteration <- 1:samples + return(id) + })) + + + + # Produce an empty cost vector to use as a starting point for all components + # (we can re-use it this way): + cost <- numeric(time_horizon + 1) + + # create a list with $mol = molecule and $data = vector of per-cycle costs + + line_labs <- structure(1:max_trt_lines,.Names=paste0("line_",1:max_trt_lines)) + mol_labs <- structure(unique(drug_and_admin$Molecule),.Names=paste0("mol_",unique(drug_and_admin$Molecule))) + + # For the PSA version of the function, we wrap the whole process for the deterministic + # function in a lapply - one for each PSA iteration! + + # one-off costs are actually not part of this function! + # + # + # Now, the PSA version is just like the deterministic version but we filter the + # table by PSA iteration AND line and molecule. + # + # the main model function for the probabilistic version for a specific PSA iteration + # then instead of pulling out p$costs$mk pulls ONE PSA ITERATION from p_psa$costs + # thereby slotting in those inputs with the same format they have in the + # deterministic model. This means the results are compatible with the lambda + # approximation method AND ALSO doing the "full-blown" PSA (i.e. full expanded + # TRACE calculations) + # + # + # generate samples structural clones of the cost structure used for deterministic with + # probabilistic values + # + # NOTE that although this uses multicore, it DOES NOT include any of the RNG. + # This preserves seeds and avoids issues without loss of speed. + # + s <- 1:samples + return(with_progress({ + prog <- progressr::progressor(along=s) + # future_lapply(s, function(psa_iteration) { + lapply(s, function(psa_iteration) { + + prog(paste0("PSA | cost inputs #",psa_iteration)) + + # note that these only ever apply to on treatment states: + da_cost_vecs <- lapply(line_labs, function(tx_line) { + lapply(mol_labs, function(mol) { + + if (mol == 999) { + return(list( + drug = drug, + admin = admin + )) + } + + # make a message optionally + if (verbose) cat(paste0("Drug costs: line_",tx_line, "$mol_",mol,"\n")) + + # pull out the data and the correct RDI for this treatment line + data <- drug_and_admin[Molecule == mol & iteration == psa_iteration,] + data$RDI.Mean <- data[[paste0("RDI_line_",tx_line)]] + + # We don't have any component identifier (i.e. a number for component) + # We also don't have a number for type, and type is simply lumped as per cycle or one off + # meaning that there's no way to uniquely identify the rows. + # instead we have to cycle through the components: + # + # empty vectors to populate: + drug <- cost + admin <- cost + + cost_vectors <- lapply(1:nrow(data), function(data_row) { + current_row <- data[data_row,] + stopifnot(current_row$Type %in% c("per cycle", "one-off")) + + # when costs are incurred: + if (current_row$Type == "per cycle") { + dose_at_cycles <- seq( + floor(current_row$Time.from), + ceiling(current_row$Time.to..cycles.), + by = floor(current_row$Applied.every.x.cycles) + ) + } else if (current_row$Type == "one-off") { + dose_at_cycles <- 1 + } + + # drug costs + cpd <- current_row$Drug.cost.per.dose + ndose <- current_row$Number.of.doses.when.applied + if (current_row$Type == "one-off") { + rdi <- 1 + } else { + rdi <- current_row$RDI.Mean + } + drug[dose_at_cycles] <- cpd*ndose*rdi + + + # Admin costs + cpa <- current_row$Admin.cost..per.administration..Mean + naa <- current_row$Number.of.doses.when.applied + admin[dose_at_cycles] <- cpa*naa*rdi + + # Return a list containing the 3 components of cost: + return(list( + drug = drug, + admin = admin + )) + }) + + # Cycle through our list, cumulative adding up drug costs, and cumulatively + # adding up admin costs + if(length(cost_vectors) == 1) { + # If there's only one component, give me drug and admin as 2 vectors: + return(list( + drug = cost_vectors[[1]]$drug, + admin = cost_vectors[[1]]$admin + )) + } else { + # If there's more than one component, add them up by drug and admin + Reduce( + x = 2:length(cost_vectors), + accumulate = FALSE, + init = cost_vectors[[1]], + function(prev, row_n) { + list( + drug = prev$drug + cost_vectors[[row_n]]$drug, + admin = prev$admin + cost_vectors[[row_n]]$admin + ) + } + ) + } + }) + }) + + # Drug costs and MRU work on different schedules, so we need a separate process + # for MRU. it follows the same structure though: + + mol_labs <- structure(unique(pcc$Molecule),.Names=paste0("mol_",unique(pcc$Molecule))) + + mru_vecs <- lapply(line_labs, function(tx_line) { + lapply(mol_labs, function(mol) { + if(verbose) cat(paste0("Resource use: line_", tx_line, "$mol_",mol,"\n")) + + dat <- pcc[Molecule == mol & iteration == psa_iteration,] + + onoff <- structure(c("On", "Off"), .Names = c("on", "off")) + + if(mol != 999) { + lapply(onoff, function(treatment_status) { + tx <- dat[grep(treatment_status,Type.of.cost,ignore.case = FALSE),] + + # Work out each cost vector and then add them up (reduce with `+` will + # add up a bunch of list elements to give you the result :-) + return(Reduce( + `+`, + lapply(1:nrow(tx), function(tx_row) { + inp <- tx[tx_row,] + + dose_at_cycles <- floor(seq(inp$Time.from..cycle., inp$Time.to..cycles.)) + out <- cost + out[dose_at_cycles] <- inp$mean + out + }) + )) + }) + } else { + # BSC MRU, just has 1 row + dose_at_cycles <- floor(seq(floor(dat$Time.from..cycle.), ceiling(dat$Time.to..cycles.))) + out <- cost + out[dose_at_cycles] <- dat$mean + return(list(on=out,off=cost)) + } + }) + }) + + # Return a reorganised list by category, line, molecule + return(list( + drug = lapply(da_cost_vecs, function(li) {lapply(li, function(mol) mol[["drug"]])}), + admin = lapply(da_cost_vecs, function(li) {lapply(li, function(mol) mol[["admin"]])}), + mru_on = lapply(mru_vecs, function(li) {lapply(li, function(mol) mol[["on"]])}), + mru_off = lapply(mru_vecs, function(li) {lapply(li, function(mol) mol[["off"]])}) + )) + + }) + })) + + + } else { + + # DETERMINISTIC MODEL: + + + # Subsetting to the required columns + drug_and_admin <- merge.data.table(drug_and_admin_raw[, list( + Treatment.name, + Type, + Molecule, + Drug.cost.per.dose, + Admin.cost..per.administration..Mean, + Applied.every.x.cycles, + Number.of.doses.when.applied, + Time.from, + Time.to..cycles. + )],rdi_table) + + + # per cycle costs table is messy because the named range in excel doesn't include + # the top header row (because there are 2. This breaks the function.) + pcc <- as.list(as.data.table(per_cycle_costs[2:nrow(per_cycle_costs),])) + pcc_nam <- names(pcc) + pcc <- as.data.table(lapply(1:length(pcc), function(column_index) { + if (column_index %in% c(1,2)) { + return(pcc[[column_index]]) + } else { + return(as.numeric(pcc[[column_index]])) + } + })) + names(pcc) <- pcc_nam + rm(pcc_nam) + setnames(pcc,"Cost","mean") + setnames(pcc,"X5","SE") + + + # Produce an empty cost vector to use as a starting point for all components + # (we can re-use it this way): + cost <- numeric(time_horizon + 1) + + # create a list with $mol = molecule and $data = vector of per-cycle costs + + line_labs <- structure(1:max_trt_lines,.Names=paste0("line_",1:max_trt_lines)) + mol_labs <- structure(unique(drug_and_admin$Molecule),.Names=paste0("mol_",unique(drug_and_admin$Molecule))) + + # note that these only ever apply to on treatment states: + da_cost_vecs <- lapply(line_labs, function(tx_line) { + lapply(mol_labs, function(mol) { + + # make a message optionally + if (verbose) cat(paste0("Drug costs: line_",tx_line, "$mol_",mol,"\n")) + + # pull out the data and the correct RDI for this treatment line + data <- drug_and_admin[Molecule == mol,] + data$RDI.Mean <- data[[paste0("RDI_line_",tx_line)]] + + # We don't have any component identifier (i.e. a number for component) + # We also don't have a number for type, and type is simply lumped as per cycle or one off + # meaning that there's no way to uniquely identify the rows. + # instead we have to cycle through the components: + # + # empty vectors to populate: + drug <- cost + admin <- cost + + cost_vectors <- lapply(1:nrow(data), function(data_row) { + current_row <- data[data_row,] + stopifnot(current_row$Type %in% c("per cycle", "one-off")) + + # when costs are incurred: + if (current_row$Type == "per cycle") { + dose_at_cycles <- seq( + floor(current_row$Time.from), + ceiling(current_row$Time.to..cycles.), + by = floor(current_row$Applied.every.x.cycles) + ) + } else if (current_row$Type == "one-off") { + dose_at_cycles <- 1 + } + + # drug costs + cpd <- current_row$Drug.cost.per.dose + ndose <- current_row$Number.of.doses.when.applied + if (current_row$Type == "one-off") { + rdi <- 1 + } else { + rdi <- current_row$RDI.Mean + } + drug[dose_at_cycles] <- cpd*ndose*rdi + + + # Admin costs + cpa <- current_row$Admin.cost..per.administration..Mean + naa <- current_row$Number.of.doses.when.applied + admin[dose_at_cycles] <- cpa*naa*rdi + + # Return a list containing the 3 components of cost: + return(list( + drug = drug, + admin = admin + )) + }) + + # Cycle through our list, cumulative adding up drug costs, and cumulatively + # adding up admin costs + if(length(cost_vectors) == 1) { + # If there's only one component, give me drug and admin as 2 vectors: + return(list( + drug = cost_vectors[[1]]$drug, + admin = cost_vectors[[1]]$admin + )) + } else { + # If there's more than one component, add them up by drug and admin + Reduce( + x = 2:length(cost_vectors), + accumulate = FALSE, + init = cost_vectors[[1]], + function(prev, row_n) { + list( + drug = prev$drug + cost_vectors[[row_n]]$drug, + admin = prev$admin + cost_vectors[[row_n]]$admin + ) + } + ) + } + }) + }) + + # Drug costs and MRU work on different schedules, so we need a separate process + # for MRU. it follows the same structure though: + + mol_labs <- structure(unique(pcc$Molecule),.Names=paste0("mol_",unique(pcc$Molecule))) + + mru_vecs <- lapply(line_labs, function(tx_line) { + lapply(mol_labs, function(mol) { + if(verbose) cat(paste0("Resource use: line_", tx_line, "$mol_",mol,"\n")) + + dat <- pcc[Molecule == mol,] + + onoff <- structure(c("On", "Off"), .Names = c("on", "off")) + + if(mol != 999) { + lapply(onoff, function(treatment_status) { + tx <- dat[grep(treatment_status,Type.of.cost,ignore.case = FALSE),] + + # Work out each cost vector and then add them up (reduce with `+` will + # add up a bunch of list elements to give you the result :-) + return(Reduce( + `+`, + lapply(1:nrow(tx), function(tx_row) { + inp <- tx[tx_row,] + + dose_at_cycles <- floor(seq(inp$Time.from..cycle., inp$Time.to..cycles.)) + out <- cost + out[dose_at_cycles] <- inp$mean + out + }) + )) + }) + } else { + # BSC MRU, just has 1 row + dose_at_cycles <- floor(seq(floor(dat$Time.from..cycle.), ceiling(dat$Time.to..cycles.))) + out <- cost + out[dose_at_cycles] <- dat$mean + return(list(on=out,off=cost)) + } + }) + }) + + # Return a reorganised list by category, line, molecule + return(list( + drug = lapply(da_cost_vecs, function(li) {lapply(li, function(mol) mol[["drug"]])}), + admin = lapply(da_cost_vecs, function(li) {lapply(li, function(mol) mol[["admin"]])}), + mru_on = lapply(mru_vecs, function(li) {lapply(li, function(mol) mol[["on"]])}), + mru_off = lapply(mru_vecs, function(li) {lapply(li, function(mol) mol[["off"]])}) + )) + } +} + +#' a catch-all function for "other" costs that don't fit into the usual structure. +#' +#' @param one_off_costs Named range `R_table_MRU_oneoff` from Excel input file +#' +#' +#' @details At this stage, this function only incorporates one-off costs upon +#' entering each line, to represent radiotherapy, palliative surgery, +#' and end of life costs. +#' +#' +#' +f_process_other_cost_data <- function(one_off_costs, + PSA = FALSE, + n_psa = NULL) { + #cleaning + one_off_costs <- data.table(one_off_costs) + one_off_costs <- one_off_costs[-1,] + setnames(one_off_costs,old = c("Cost","X5"),new = c("mean", "SE")) + one_off_costs[, (c("mean", "SE")) := lapply(.SD, as.numeric), .SDcols = c("mean", "SE")] + + #remove treatment initiation as this is handled in f_process_cost_data + one_off_costs <- one_off_costs[Type.of.cost != "Treatment initiation\r\n",] + + if (PSA == FALSE) { + #return mean + one_off_costs[,`:=`(SE = NULL)] + setnames(one_off_costs,old = c("mean"),new = c("cost")) + + } else { + #return a set of sampled values + stop("Code to be written") + draws <- do.call( + rbind, + lapply(1:nrow(one_off_costs), function(row_n) { + dat <- one_off_costs[row_n,] + # Normal distribution as population level (gamma tends to normal - Prof. Stevenson) + return(rnorm(n_psa,dat$mean,dat$SE)) + }) + ) + one_off_costs <- cbind(one_off_costs,draws) + } + + return(one_off_costs) +} + + +#' used to clean up some subsequent treatment data for the partitioned survival model. not used. +f_process_subs_txt_data <- function(subs_txt, PSA = FALSE){ + + #cleaning + colnames(subs_txt)[1] <- c("Treatment") + + if (PSA == FALSE) { + #return mean + subs_txt = subs_txt[,1:6] + colnames(subs_txt)[3] <- "drug_cost" + colnames(subs_txt)[4] <- "admin_cost" + colnames(subs_txt)[5] <- "AE_cost" + colnames(subs_txt)[6] <- "AE_QALY_impact" + } else { + #return a set of sampled values + stop("Code to be written") + } + + return(subs_txt) + +} + +#' Apply per cycle costs to expanded Markov trace (expanded = with tunnel states). NOT USED. +#' +#' @details A legacy function which was used during the development of the state transition model. +#' Not used in the final version, which incorporates these calculations +#' within function `f_pf_computePF_mk` directly instead. The reason for this +#' is that `f_pf_computePF_mk` is dealing with a lot of memory at once +#' and instead of copy-pasting a potentially 500MB of RAM-using expanded trace +#' (with up to 18,000 health states...) it uses that within the function, +#' calculating drug costs there. +#' +#' The idea with this function is to find the appropriate columns +#' corresponding to the on or off-treatment tunnel states for 2L+ +#' treatments, and then multiply those columns of the trace by +#' the corresponding vector of costs going from time 0 to the time horizon +#' +#' +f_apply_costs_to_trace <- function(cost_by_cycle, + AE_costs, + per_cycle_costs, + one_off_cost, + markov_trace, + seq) { + + MRU_cost_at_tx_start <- sum(one_off_cost$cost[one_off_cost$Event.to.apply == "Progression"]) + cost_death <- one_off_cost$cost[one_off_cost$Event.to.apply == "Death"] + + cost_matrix <- matrix(data = 0, + nrow = nrow(markov_trace), + ncol = ncol(markov_trace)) + trt_line <- 0 + th <- nrow(markov_trace) + + for (molecule in seq[1:length(seq)-1]) { + trt_line <- trt_line + 1 + costs <- cost_by_cycle[[which( + lapply(cost_by_cycle, + function(x) x$mol == molecule) == TRUE)]] + + #add in AE costs + if (i$dd_apply_AE_options!="one-off") {costs$on_trt <- costs$on_trt + AE_costs[molecule+1,"cost"]} + + if (trt_line == 1) { + cost_matrix[,1] <- markov_trace[,1] * costs$on_trt + cost_matrix[,2] <- markov_trace[,2] * costs$off_trt + } else { + + # On treatment + #add in MRU costs for starting a new line of treatment + costs$on_trt[1] <- costs$on_trt[1] + MRU_cost_at_tx_start + + startcol <- (3 + (trt_line - 2) * 2 * th) + endcol <- startcol + th - 1 + # note the t() in the line below - apply[x,1,fun] returns a column vector + # even though input is a row so needs transposing + cost_matrix[,startcol:endcol] <- + t(apply(markov_trace[,startcol:endcol], 1, function(x) x * costs$on_trt)) + + # Off treatment + startcol <- endcol+1 + endcol <- startcol + th - 1 + # note the t() in the line below - apply[x,1,fun] returns a column vector + # even though input is a row so needs transposing + cost_matrix[,startcol:endcol] <- + t(apply(markov_trace[,startcol:endcol], 1, function(x) x * costs$off_trt)) + } + } + + # Add in additional cost of starting BSC + molecule <- seq[length(seq)] + if (molecule != 999) stop("Last molecule in sequence is not BSC! Full sequence: ", seq) + + startcol <- endcol + 1 + cost_matrix[,startcol] <- markov_trace[,startcol] * MRU_cost_at_tx_start + + #cost of death + propn_entering_by_cycle <- markov_trace[,ncol(markov_trace)] - + shift(markov_trace[,ncol(markov_trace)], n=1, fill=0) + + cost_matrix[,ncol(cost_matrix)] <- propn_entering_by_cycle * cost_death + + + cost_matrix +} + + diff --git a/3_Functions/costs_and_QALYs/subsequent_tx_weighted_averages.R b/3_Functions/costs_and_QALYs/subsequent_tx_weighted_averages.R new file mode 100644 index 0000000..2a24d30 --- /dev/null +++ b/3_Functions/costs_and_QALYs/subsequent_tx_weighted_averages.R @@ -0,0 +1,56 @@ +# code to calculate weighted averages of subsequent therapies + +#' an early attempt at computing model averaging for an overall population (see `p$basic$lookup$pop_map`). +#' Not used as averaging was incorporated into results processing in `Model_Structure.R` +#' +f_calculate_weighted_average_of_subsequent_treatments <- function(proportions = i$R_table_sub_txts_prop_n_costs, + mol_lookup = i$r_pld_lookup_mol, + popn_lookup = i$r_overall_lookup_pop) { + + #temp line at present to read in a res object to process + res <- readRDS("D:/Downloads/markov_model_results_bc_PHNMA.rds") + + proportions <- proportions[,c(1:5,11)] + mol_lookup <- rbind(mol_lookup, c("BSC","BSC",999)) + results <- list() + + for (i in 1:6) { + seq_popn <- popn_lookup$Sequencing.population.number[i] + + results[[i]] <- list(population = i, + seq_popn = seq_popn, + proportions = proportions[proportions$Population == paste0("pop",seq_popn),]) + + results[[i]]$proportions$trt_n <- paste0(mol_lookup[match(results[[i]]$proportions$Line.1, mol_lookup[,2]),3], "→", + mol_lookup[match(results[[i]]$proportions$Line.2, mol_lookup[,2]),3], "→", + mol_lookup[match(results[[i]]$proportions$Line.3, mol_lookup[,2]),3], "→", + mol_lookup[match(results[[i]]$proportions$Line.4, mol_lookup[,2]),3]) + results[[i]]$proportions$trt_n <- gsub("→NA","",results[[i]]$proportions$trt_n) + + results[[i]]$proportions$costs_undisc <- res$undisc[[i]]$res$costs[match(results[[i]]$proportions$trt_n, res$undisc[[i]]$res$trt_n)] + results[[i]]$proportions$qalys_undisc <- res$undisc[[i]]$res$qalys[match(results[[i]]$proportions$trt_n, res$undisc[[i]]$res$trt_n)] + results[[i]]$proportions$ly_undisc <- res$undisc[[i]]$res$ly[match(results[[i]]$proportions$trt_n, res$undisc[[i]]$res$trt_n)] + + results[[i]]$proportions$costs_disc <- res$disc[[i]]$res$costs[match(results[[i]]$proportions$trt_n, res$disc[[i]]$res$trt_n)] + results[[i]]$proportions$qalys_disc <- res$disc[[i]]$res$qalys[match(results[[i]]$proportions$trt_n, res$disc[[i]]$res$trt_n)] + + results[[i]]$proportions$p.costs_undisc <- results[[i]]$proportions$Adj.proportion.given.line.1 * results[[i]]$proportions$costs_undisc + results[[i]]$proportions$p.qalys_undisc <- results[[i]]$proportions$Adj.proportion.given.line.1 * results[[i]]$proportions$qalys_undisc + results[[i]]$proportions$p.ly_undisc <- results[[i]]$proportions$Adj.proportion.given.line.1 * results[[i]]$proportions$ly_undisc + + results[[i]]$proportions$p.costs_disc <- results[[i]]$proportions$Adj.proportion.given.line.1 * results[[i]]$proportions$costs_disc + results[[i]]$proportions$p.qalys_disc <- results[[i]]$proportions$Adj.proportion.given.line.1 * results[[i]]$proportions$qalys_disc + + results[[i]]$proportions <- as.data.table(results[[i]]$proportions) + + results[[i]]$summaries <- results[[i]]$proportions[,list(costs_undisc = sum(p.costs_undisc), + qalys_undisc = sum(p.qalys_undisc), + ly_undisc = sum(p.ly_undisc), + costs_disc = sum(p.costs_disc), + qalys_disc = sum(p.qalys_disc)), + by = results[[i]]$proportions$Line.1] + } + + return(results) +} + diff --git a/3_Functions/costs_and_QALYs/utility_processing.R b/3_Functions/costs_and_QALYs/utility_processing.R new file mode 100644 index 0000000..7d8c734 --- /dev/null +++ b/3_Functions/costs_and_QALYs/utility_processing.R @@ -0,0 +1,191 @@ +# utilities processing into QALYs + +#' uses named range `R_table_util` from Excel to generate values applied to states +#' in the models. Populates both ST and PS models as these are simple HSUVs. +#' +#' @param raw_utilities named range `R_table_util` from Excel front end file +#' @param PSA Flag for generating samples for probabilistic setting +#' @param samples Number of sample if `PSA==TRUE` +#' +#' @details Simple function to tidy up the table from Excel and produce a format +#' which can then be further processed for the ST or PS models. +#' +#' +f_process_utilities <- function(raw_utilities, + PSA = FALSE, + samples = NULL) { + + if (PSA == TRUE & is.null(samples)) { + stop("Warning: request to return PSA samples for utility data and number of samples not specified in call to f_process_utilities") + } + + utilities <- raw_utilities[,1:9] + colnames(utilities)[6:9] <- gsub("Mean","",colnames(utilities)[6:9]) + + if (PSA == FALSE) { + return(utilities) + } else { + sampled <- list() + # cat("Generating",samples,"PSA utility samples") + + raw_utilities <- data.table(raw_utilities) + + out_tab <- raw_utilities[,list(Population, Population.name, Treatment.line, Molecule, Treatment.name)] + + # beta parameters for all rows, all with identifiers with Mean taken out: + u_beta <- lapply(estBetaParams(raw_utilities[, 6:9], raw_utilities[, 10:13] ^ 2), function(param) { + out <- cbind(out_tab,param) + colnames(out) <- gsub("Mean","",colnames(out)) + out + }) + + # Cycle down the rows of this generating all PSA iterations all in one go and + # producing one big table with iteration id + s <- 1:samples + return(rbindlist(lapply(1:nrow(u_beta$alpha), function(param_row) { + + id <- u_beta$alpha[param_row,list(Population, Population.name, Treatment.line, Molecule, Treatment.name)] + + id <- as.data.table(lapply(id, function(x) rep(x,samples))) + + id$iteration <- s + + # Get data for this HSUV + dat <- list( + alpha = u_beta$alpha[param_row,list(OnTxt, OffTxt, PFS, PD)], + beta = u_beta$beta[param_row,list(OnTxt, OffTxt, PFS, PD)] + ) + + # Generate common random numbers + random_gen <- runif(samples) + + hsuv_table <- as.data.table(lapply(structure(names(dat$alpha),.Names=names(dat$alpha)), function(state) { + qbeta( + random_gen, + shape1 = .subset2(dat$alpha,state), + shape2 = .subset2(dat$beta,state) + ) + })) + + # stick the id table and the HSUV table together to produce the result - samples PSA iterations of all states for this category + return(data.table(id,hsuv_table)) + }))) + } +} + + + +#' NOT USED - an early attempt at applying utiltiies (including AE utilities) +#' to the trace produced by the ST model +#' +f_apply_utilities_to_trace <- function(utilities, + markov_trace, + apply_AE = c("one-off", "per cycle"), + AE_rate, + AE_QALY_penalty, + pop, + seq, + cycle_length, + .p, + age_adjust, + starting_age, + verbose) { + + apply_AE <- match.arg(apply_AE) + age_adjust <- (age_adjust == "Yes") + + QALY_matrix <- matrix(0, nrow = nrow(markov_trace), ncol = ncol(markov_trace)) + colnames(QALY_matrix) <- colnames(markov_trace) + for (tx_line in seq_along(seq)) { + mol <- seq[tx_line] + + if(verbose) cat("Calculating QALY matrix for population", pop, "Line", tx_line, "= molecule", mol,"\n") + + #calculate QALYs accrued per cycle for on and off tx for this line and molecule + QALY_on <- utilities$OnTxt[utilities$Population == pop & + utilities$Treatment.line == tx_line & + utilities$Molecule == mol] * cycle_length + + if(length(QALY_on) != 1) { + stop("multiple or no entries found in R_table_util for population ", pop, + " Line ", tx_line, " Molecule ", mol," on treatment.\nSuggest check excel inputs file, utilities sheet.\n") + } + + #add in AE QALY penalty + if (apply_AE != "one-off") { + if (mol != 999) { + QALY_on <- QALY_on + + AE_QALY_penalty$QALYs[ + AE_QALY_penalty$trt == i$List_comparators[mol+1] + & AE_QALY_penalty$line == tx_line + ] + } + #no AE decrement is applied here either for BSC or when one off-setting is chosen + } + + + # don't calculate off treatment QALYs if treatment is BSC (as there is no 'off tx' for BSC) + if (mol != 999) { + QALY_off <- utilities$OffTx[utilities$Population == pop & + utilities$Treatment.line == tx_line & + utilities$Molecule == mol] * cycle_length + + + if(length(QALY_off) != 1) { + stop("multiple or no entries found in R_table_util for population ", pop, + " Line ", tx_line, " Molecule ", mol," off treatment.\nSuggest check excel inputs file, utilities sheet.\n") + } + } + + if (age_adjust) { + #age adjust QALY_on and QALY_off here - vector of th length + QALY_on <- rep(QALY_on, nrow(markov_trace)) + QALY_on <- adjust_utility(age = starting_age, sex = 0.5, + utilities = data.frame(cycle = 1:nrow(markov_trace), QALY_on = QALY_on), + .p = .p) + + if (mol != 999) { + QALY_off <- rep(QALY_off, nrow(markov_trace)) + QALY_off <- adjust_utility( + age = starting_age, + sex = 0.5, + utilities = data.frame(cycle = 1:nrow(markov_trace), QALY_off = QALY_off), + .p = .p + ) + } + } + + #add to QALYs_matrix + #note this works whether QALY_on is a single value or vector of length nrow(markov_trace) + QALY_matrix[, 1 + (2 * (tx_line - 1))] <- markov_trace[, 1 + (2 * (tx_line - 1))] * QALY_on + if (mol != 999) { + QALY_matrix[, 2 + (2 * (tx_line - 1))] <- markov_trace[, 2 + (2 * (tx_line - 1))] * QALY_off + } + + + if (apply_AE == "one-off") { + if (mol != 999) { + AE_qaly_weight <- AE_QALY_penalty$QALYs[AE_QALY_penalty$trt == i$List_comparators[mol+1] & + AE_QALY_penalty$line == tx_line] + AE_qaly_duration <- mean( + i$R_table_AE_rates$duration_weeks[ + i$R_table_AE_rates$Molecule == mol + & i$R_table_AE_rates$Treatment.line == if (tx_line>2) 2 else tx_line + ] + ) + AE_oneoff_penalty <- AE_qaly_weight * AE_qaly_duration + + + if (tx_line == 1) { + QALY_matrix[1, 1 + (2 * (tx_line - 1))] <- QALY_matrix[1, 1 + (2 * (tx_line - 1))] + AE_oneoff_penalty + } else { + QALY_matrix[, 1 + (2 * (tx_line - 1))] <- c(0,diff(QALY_matrix[, 1 + (2 * (tx_line - 1))])) * (1 + AE_oneoff_penalty) + #### work in progress needs to link to number entering the state + } + } + } + + } + + return (QALY_matrix) +} diff --git a/3_Functions/excel/excel extraction example.xlsx b/3_Functions/excel/excel extraction example.xlsx new file mode 100644 index 0000000..c3d05ae Binary files /dev/null and b/3_Functions/excel/excel extraction example.xlsx differ diff --git a/3_Functions/excel/extract.R b/3_Functions/excel/extract.R new file mode 100644 index 0000000..de2d090 --- /dev/null +++ b/3_Functions/excel/extract.R @@ -0,0 +1,105 @@ +# This script contains the functions required to extract data from excel. +# The primary function f_excel_extract extracts all named ranges from an excel file, +# and puts all of those objects into an object of the same name in a flat list. + +#' Function to automatically extract all named ranges from an excel workbook +#' +#' +#' @param path_to_excel_file the path to the excel file. Ensure that this is normalized for the operating system you are using +#' @param verbose if FALSE (the default), no printing will occur. If TRUE the name and file path of each named range will be printed to trace errors. +#' +f_excel_extract <- function(path_to_excel_file, verbose = FALSE) { + + # require(openxlsx) + + # Note that pre-loading the workbook to an object vastly improves computational + # efficiency, because it's in RAM and not on disk. Reading from disk repeatedly + # is extremely slow and even a small task like this can take time unnecessarily. + # Load workbook, get the named regions from it ready for lapply. + + wb <- loadWorkbook(path_to_excel_file) + sheets <- wb$sheet_names + named <- getNamedRegions(wb) + nams <- unlist(as.list(named)) + names(nams) <- nams + + # Cycle through all the named ranges, making a decision on cleaning as we go + output <- lapply(nams, function(named_range) { + + if (verbose) cat(paste0("Extracting named range ", named_range, " from ", path_to_excel_file, "\n")) + + # It is impossible to presume the inclusion or exclusion of headers without + # further information. This is because if the headers are included in the columns + # the columns become character data, and tables may contain text in rows + # so we can't try to convert to numbers to identify whether the header + # row is erroneously included in the named range. Therefore we assume + # that all tables for R DO include the column names, and these are converted + # to R-safe column names (i.e. replacing special characters with dots) + + dat <- read.xlsx(wb,namedRegion = named_range, check.names = TRUE,colNames = TRUE) + + if (nrow(dat) == 0) { + # if it has 0 rows, then the input must be a 1 cell one, so we can safely assume + # no row names + dat <- read.xlsx(wb,namedRegion = named_range,colNames = FALSE,rowNames = FALSE) + + if (all(length(dat) == 1, names(dat)[1] == "X1")) { + dat <- unlist(dat, use.names = FALSE) + } else if(nrow(dat) == 1) { + dat <- unlist(read.xlsx(wb,namedRegion = named_range,colNames = FALSE,rowNames = FALSE), use.names = F) + } + + return(dat) + } else if(ncol(dat) == 1) { + # if there's 1 column, then it's actually just a vector: + dat <- unlist(read.xlsx(wb,namedRegion = named_range,colNames = FALSE,rowNames = FALSE), use.names = F) + } else { + # logically we can't identify whether a table has appropriate names or not + # at this point. this will have to be left to the cleaning functions. + return(dat) + } + }) + + # Drop the workbook at the end (I think in {} this would happen anyway, but want to ensure!) + rm(wb) + return(output) +} + + + +#' Function to take the table "R_table_param" from the "PATT RCC_model inputs.xlsx workbook +#' and expand it out into a list of lists which can then be expanded during cleaning (for +#' things such as adding in a vcov and such) +#' +#' @param tab tbe named range "R_table_param" from "PATT RCC_model inputs.xlsx" +#' +#' @examples +#' +#' i <- f_excel_extract(path_to_excel_file, verbose = TRUE) +#' f_excel_cleanParams(i$R_table_param) +#' +f_excel_cleanParams <- function(tab, verbose = FALSE) { + require(data.table) + tab <- as.data.table(tab) + params <- tab$Parameter.name + names(params) <- params + + list_for_i <- lapply(params, function(param) { + + if (verbose) cat(paste0("Excel extraction - tidying parameters table: ",param,"\n")) + + # for each param, grab the row as a list so we can add to it with things + # that don't have to be 1x1 + out_list <- as.list(tab[Parameter.name == param,]) + + # there are several columns that should be numeric but aren't due to other + # row values being text + suppressWarnings( + if (!is.na(as.numeric(out_list$Mean))) out_list$Mean <- as.numeric(out_list$Mean) + ) + + return(out_list) + }) + + return(list_for_i) +} diff --git a/3_Functions/markov/markov.R b/3_Functions/markov/markov.R new file mode 100644 index 0000000..61ccad6 --- /dev/null +++ b/3_Functions/markov/markov.R @@ -0,0 +1,952 @@ +# This script defines the functions for extrapolating the treatment line residency +# by line, on/off treatment, BSC and dead. This includes several functions: +# +# - A function specific to this project to take the 2d numeric matrix of transition probabilities and convert it to the list structure required for the M compiler +# - Separates columns out into separate matrices by treatment line +# - Calculates any 1-sum(x) +# - splits the matrices into numeric vectors for efficiency later +# - A function to compile the sparse matrix, M, containing the transitions within and across treatment lines per model cycle, which are contained in matrix TP +# - Input is a NAMED LIST of lists of numeric vectors, and a debug flag which checks probabilities and final trace all sum to 1 at the cost of speed +# - Lines of therapy MUST be called Lx where x is treatment line +# - lines of therapy contain 2 elements: on and off relating to on or off treatment +# - on treatment contains 4 numeric vectors named stay, disc, next, die +# - off treatment contains 3 numeric vectors named stay, next, die +# - Summing the nth element from these vectors should always sum to 1 and this is checked if debug==TRUE +# - BSC MUST be called "bsc" +# - bsc only contains stay and dead probabilities +# - Expects each line object to contain 3 or 4 numeric vectors called "stay", "disc", "next", "die" +# - A function to compute a Markov trace from the initial population vector p (summing to 1) as efficiently as possible +# - The top 2 rows of M change each model cycle (transitions for first-line) according to TP +# - the rest of M is static +# - Extrapolation will therefore use block matrix multiplications for efficiency. Before extrapolating: +# - break off M into M1 and M2 where M1 is the first 2 rows of M and M2 is the rest +# - Place first set of values for M1 from TP$L1 into M1 +# - Extrapolation then simplifies to this each model cycle: +# - p_tplus1 = (p_t[1:2] %*% M1) + (p_t[3:N] %*% M2) +# - both of these produce a 1xN vector where N is the number of columns in M +# - vectorised sum is near-instant in R, so this is efficient and reduces the size of the matrix multiplication + + + +# Data manipulation function ---------------------------------------------- + +# This function manipulates the 2d numeric matrix TP into the list format needed +# for the M compiler + +requireNamespace("data.table") +requireNamespace("tidyverse") +requireNamespace("collapse") +requireNamespace("Matrix") +requireNamespace("openxlsx") + + + +#' Function to prepare a 2d numeric matrix containing transition probabilities +#' describing a treatment sequencing model for compilation into the large sparse matrix +#' required to extrapolate such a model's treatment line residency. +#' +#' NO LONGER USED IN THE MODEL. Legacy code during formation of this strategy. +#' Includes unit testing so remains in the repository as could be used in future. +#' +#' Note that the naming of columns is strict, and the function makes the following assumptions: +#' - There is a column for treatment cycle called "c" in the first column +#' - There MUST be a prefix Ln where n is treatment line +#' - The TPs MUST be in the following order for each treatment line: +#' - Discontinuation probability per cycle +#' - Next treatment probability per cycle +#' - Death probability per cycle +#' - The last line of treatment must have a next line probability (i.e. to go onto BSC or no treatment) +#' - There MUST be a column for death probability whilst on no treatment +#' +#' @param tp 2d numeric matrix containing cycle number, transition probabilities. must follow rules and ordering! +#' @param n_lines the number of treatment lines NOT INCLUDING NO TREATMENT AT THE END +#' @param debug double check TPs sum to one (they always do, but more features could be added in future) +#' +#' +#' @examples +#' # using the toy example in the NICE PATT pathways project, produce TP vectors per line and state +#' TP <- f_markov_M_prep( +#' tp = read.xlsx("./1_Data/TP matrix expansion toy example.xlsm",sheet = "Sheet3"), +#' n_lines = 4, +#' debug = FALSE +#' ) +#' +f_markov_M_prep <- function(tp,n_lines, debug = FALSE) { + + # Convert to data.table for efficiency, as.list and good subsetting later: + + if (!all(c("data.table", "data.frame") %in% class(tp))) { + tp <- as.data.table(tp) + } + + # Calculate target number of columns in data and check it's correct, error if not. + target_cols <- 1 + (n_lines * 3) + 1 + if(dim(tp)[2] != target_cols) stop(paste0( + "The transition probability matrix should have ", target_cols, + " columns, but instead it has ", dim(tp)[2], ". Each treatment line must have ", + " a column for discontinuation, next line, death, column 1 must be cycle number ", + " and the last column must be death probability per cycle whilst on no treatment" + )) + + # Rename the columns to standardize. NOTE that this assumes the correct ordering. + # We can't possibly know ex ante what these columns will be called, so we have to do this. + colnames(tp) <- c("c", unlist(lapply(1:n_lines, function(x){paste0("L",x,c("_disc", "_next", "_death"))})),"NT_death") + + # next step - separate these out into our list: + + line_names <- c(paste0("L",1:n_lines),"NT") + names(line_names) <- line_names + + # Break the TPs up into separate matrices per treatment line, adding in the required + # columns: + line_list <- lapply(line_names, function(treatment_line) { + which_nams <- which(substr(colnames(tp),1,2)== treatment_line) + tp_this_line <- as.list(tp[,which_nams,with=FALSE]) + + # add up the other elements in a vectorised way to produce the probability of leaving + # and then p(stay) = 1-p(leave) + tp_this_line[[paste0(treatment_line,"_stay")]] <- 1-Reduce(`+`,tp_this_line) + + # Finally, since we've already separated them via list depth, we can drop the + # prefixes from their names: + names(tp_this_line) <- str_replace(names(tp_this_line),paste0(treatment_line,"_"),"") + + return(tp_this_line) + }) + + # Check all probabilities sum to 1 at all times + if (debug) { + cat("Checking that TPs always sum to 1 for every line:\n") + unlist(lapply(lapply(line_list, function(tline) Reduce(`+`,tline)) , function(trt_line) all(trt_line == 1))) + } + + # so now we have a nicely organised list of transition probabilities that we can + # efficiently pull out ([] subsetting is slower than .subset2() and $ within lists) + + return(line_list) + +} + + +# N calculator ------------------------------------------------------------ + +#' Function to calculate the value of N for the sequencing modelling function. +#' Includes columns for first-line therapy, then tunnel columns for all subsequent lines +#' plus one for dead patients. +#' +#' @param n_lines number of treatment lines NOT INCLUDING no treatment at the end +#' @param TH time horizon of the model in cycles +#' +f_markov_calcN <- function(n_lines, TH) { + + # Breadkown of N: + # + # - 2 for first line on and off treatment + # - TH*2 for each subsequent treatment which is not BSC/NT + # - TH for BSC/NT + # - 1 for dead + + return(2 + (2 * TH * (n_lines-1)) + TH + 1) + +} + + + +# Topleft finder ---------------------------------------------------------- + +#' Function to locate the top left cell in the matrix M to enter values diagonally +#' from that point for each treatment line. For example 2L on treatment tunnel +#' starts from 3 3 and goes south easterly for time horizon cells. 2L off treatment +#' tunnel starts from 3 TH+3 and goes south easterly etc +#' +#' @param tun_n The number of tunnels +#' @param TH the time horizon in cycles, including cycle 0 +#' @param nt the "no tunnel" states before the tunnels start. Usually 2 for 1L on treatment, 1L off treatment. +#' +f_markov_topleftFinder <- function(tun_n, TH, nt) { + + # Calculate the indices + tun_r_on <- sum(nt , ((2*TH) * (tun_n-1)), 1) + tun_c_on_stay <- sum(nt + 1, ((2*TH) * (tun_n-1)), 1) + tun_c_on_stop <- tun_c_on_stay + TH + tun_c_on_next <- tun_c_on_stop + TH - 1 + tun_r_off <- tun_c_on_stop - 1 + tun_c_off_stay <- tun_c_on_stop + tun_c_off_next <- tun_c_on_next + + # Put them in a matrix that can be used for co-ordinates in f_vec_in_mat_diag + outMat <- matrix( + c( + tun_r_on , tun_c_on_stay, + tun_r_on , tun_c_on_stop, + tun_r_on , tun_c_on_next, + tun_r_off, tun_c_off_stay, + tun_r_off, tun_c_off_next + ), + nrow = 5, + byrow = TRUE, + dimnames = list( + c( + paste0(paste0("L",tun_n + 1,"_"),c("stay","disc","next"),"_on"), + paste0(paste0("L",tun_n + 1,"_"),c("stay","next"),"_off") + ), + c("row", "col") + ) + ) + return(outMat) +} + + + +# M compiler -------------------------------------------------------------- + +#' Function to compile a transition probability matrix defining a treatment sequence, +#' where patients move from one treatment to the next, or straight to death, including +#' on and off treatment tunnels. To be used as "block matrices" with a variable and static component. +#' +#' NOTE, this has been superseded by `f_markov_M_compiler_TPupdate` which functions exactly the same +#' except for taking tp to be pre-processed rather than the result of `f_markov_M_prep`. To exapand, +#' In the patient flow `f_pf_computePF_mk` function, the function `f_pf_mk_ComputeTPs` is now +#' used to compute the TPs in the final form required by `M`. This changed some code inside +#' this function (simply what to do with the values of `TPs` and how to manipulate them). +#' +#' WARNING: This function assumes the cycle number is 1 (i.e. M_1 is the result). This is because you should not +#' be replacing the TPM M including the whole tunnel each cycle, Instead +#' use this function just once at baseline to create the structure, and then +#' replace the required values for transitions relating to first-line therapy +#' +#' @param tp table of TPs. MUST have 1st column cycle, then disc, next, death sequentially for each line, plus no treatment death (e.g. t L1_disc L1_next L1_death L2_disc...NT_death) +#' @param n_lines Number of treatment lines total, including first line and BSC (for 4 tunnels this is 6 with BSC) +#' @param TH time horizon in model cycles (the number of rows in the original tp before using `f_markov_M_prep` on it) +#' @param N The required dimensions of M - use `f_markov_calcN` to calculate it +#' @param nt non-tunnel rows - almost always 2, but if for example second line is not a tunnel (e.g. relapse and remission model) then it would be 4 and tunnels would start at 3L ONLY 2 has been tested +#' +#' +f_markov_M_compiler <- function(tp, n_lines, TH, N, nt = 2) { + + require(data.table) + require(collapse) + require(Matrix) + + # Assume cycle = 1, which it should be every time this function is used! + c <- 1 + + # Now we have probability to stay in state as well as probability of + # discontinuation, next line, death, the rest is putting these columns + # or derivitives of them into M + + cat("Generating matrix M\n") + # Part 2: define M (WARNING this uses a lot of RAM) + + M <- Matrix(data = 0,nrow = N, ncol = N,sparse = TRUE) + M[N,N] <- 1 # dead is dead, you can't be come undead. + + cat("Finding diagonal start points\n") + # Part 3: compute top left elements + tl_elements <- do.call( + rbind, + lapply(1:(n_lines-1), function(i_tun) { + f_markov_topleftFinder( + tun_n = i_tun, + TH = TH, + nt = 2 + ) + }) + ) + + cat("Adding in BSC/NT tunnel M\n") + # Add in BSC. patients coming out of the last line of therapy go in here if they're not dead, + # so it's the same row, next column as the end of the lad + tl_elements <- rbind( + tl_elements, + matrix( + data = c(max(tl_elements[, "col"]), max(tl_elements[, "col"]) + 1), + nrow = 1, + dimnames = list("BSC_stay", c("row", "col")) + ) + ) + + # Part 4: populate the matrix: + # + # Right, now we have all the data, all the starting points, and the matrix + # to put it all into. We can proceed to populate. Let's start with first line + # as it's different from the others. + cyc_prob <- lapply(.subset2(tp,"L1"),function(x) x[1]) + vals_1L <- matrix( + c( + 1, 1, .subset2(cyc_prob,"stay"), + 1, 2, .subset2(cyc_prob,"disc"), + 1, 3, .subset2(cyc_prob,"next"), + 1, N, .subset2(cyc_prob,"death"), + 2, 2, .subset2(cyc_prob,"stay") + .subset2(cyc_prob,"disc"), + 2, 3, .subset2(cyc_prob,"next"), + 2, N, .subset2(cyc_prob,"death") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + + + # Note that we made all of our replacements in one go by making a matrix + # of matrix co-ordinates and values. If we do the same for all of our values + # we can enter all of the data needed in one fell swoop. We're therefore going + # to take the same approach as above, but on a bigger scale! + # + # To do that, we're going to do what we did in the definition of our function + # f_vec_in_mat_diag - make an index, and then compile our co-ordinate matrix + # using one row of tl_elements at a time along with the paired data from tp. + # + # The index will always be 0 to TH-1, and will simply be added to the start + # row and start column to identify where to put the data. + + indx <- 0:(TH - 1) + + co_ordinate_matrix <- lapply(1:(n_lines-1), function(line_n) { + # Identify the starting points for this line: + tl_tun <- f_markov_topleftFinder( + tun_n = line_n, + TH = TH, + nt = nt + ) + + # Make an id for this line which is used to put the right numbers in the right place + tline <- paste0("L",line_n) + + # Pull out the columns to put as the data to enter in M + line_tp <- .subset2(tp,tline) + p_stay <- .subset2(line_tp, "stay") + p_disc <- .subset2(line_tp, "disc") + p_next <- .subset2(line_tp, "next") + p_death <- .subset2(line_tp, "death") + + # make co-ordinate values and value values for the on and off treatment tunnels including + # death for this treatment line. Note that this assumes equivalency for on/off treatment + # states for death probability and next probability. Off treatment stay needs recalculating + # as there's no discontinue, so it's 1-p_death-p_next + co_list <- list( + # on-treatment tunnel: + co_on_stay = matrix(c(tl_tun[1,"row"] + indx, tl_tun[1,"col"] + indx,p_stay) ,ncol=3), + co_on_disc = matrix(c(tl_tun[2,"row"] + indx, tl_tun[2,"col"] + indx,p_disc) ,ncol=3), + co_on_next = matrix(c(tl_tun[3,"row"] + indx, tl_tun[3,"col"] + indx,p_next) ,ncol=3), + co_on_death = matrix(c(tl_tun[1,"row"] + indx, rep(N,TH) ,p_death),ncol=3), + + # Off treatment tunnel for this line: + co_off_stay = matrix(c(tl_tun[4,"row"] + indx, tl_tun[4,"col"] + indx,1 - p_death - p_next),ncol=3), + co_off_next = matrix(c(tl_tun[5,"row"] + indx, tl_tun[5,"col"] + indx,p_next ),ncol=3), + co_off_death = matrix(c(tl_tun[4,"row"] + indx, rep(N,TH) ,p_death ),ncol=3) + ) + + # Bind it together into one larger matrix and return that + return(do.call(rbind,co_list)) + }) + + # co_ordinate_matrix now has all of the data organised except for the BSC/NT tunnel. + # We add that here: + + bsc_start_point <- f_markov_topleftFinder( + tun_n = n_lines - 1, + TH = TH, + nt = nt + )[5,"col"] + co_bsc <- rbind( + matrix(c(bsc_start_point + indx, bsc_start_point + 1 + indx,tp$NT$stay) ,ncol=3), + matrix(c(bsc_start_point + indx, rep(N,TH) ,tp$NT$death),ncol=3) + ) + + # bind it all together into one massive matrix of co-ordinates and values: + co_ordinate_matrix <- rbind(do.call(rbind,co_ordinate_matrix),co_bsc) + + cat("Inserting values\n") + # Put all values into the matrix in one vectorised command: + M[co_ordinate_matrix[,1:2]] <- co_ordinate_matrix[,3] + + # The last cycle of BSC is an issue, stay probability isn't entered. + M[N-1,N-1] <- 1-M[N-1,N] + + # Well all rows sum to 1 so that's promising: + # all(round(rowSums(M),10) == 1) # reads true: + return(M) + +} + +# Markov trace calculator ------------------------------------------------- + +# Simple function to take the results of f_markov_M_compiler and use them to +# extrapolate the population using a block-matrix approach. Requires the first-line +# transitions from TP (i.e. TP$L1 after applying f_markov_M_prep) +# +# We try to do this as efficiently as possible as it's a big number crunch, hence +# the sparse matrix approach, and splitting to blocks: + + +#' Function to extrapolate a treatment sequence using a block sparse matrix multiplication +#' method. THIS FUNCTION IS NOT USED AND HAS BEEN SUPERSEDED BY `f_markov_M_extrapolator` +#' +#' @param L1_tp transition probabilities for first-line treatment, resulting from `f_markov_M_prep`. will change within M each model cycle (hence block matrix approach) +#' @param TH time horizon in model cycles. Not inferred so that a shorter time horizon can be entered if desired. +#' @param M TP matrix compiled using `f_markov_M_compiler` +#' @param N dimension of M, calculate either as `dim(M)[1]`, `dim(M)[2]`, or use `f_markov_calcN` +#' @param p (optional) initial population vector. Usually sparse matrix 1xN with value of 1 in 1,1. Assumed so if not entered, but does allow mixed lines at baseline if desired. +#' +#' @details superseded by `f_markov_M_extrapolator`. `f_markov_M_extrapolator` +#' does the same job as f_markov_sequencExtrapolator, but expects the +#' `L1_tp` argument to be the result of `f_pf_mk_ComputeTPs`, rather +#' than computing those TPs "in-house". This function is tested via +#' unit test so remains. +#' +f_markov_sequencExtrapolator <- function(L1_tp,TH,M,N,p=NULL) { + + require(Matrix) + require(utils) + + # If the user doesn't provide p, make it and put everyone in first-line therapy + # at baseline (90+% of cases this is true, though sometimes reasonably people choose BSC/NT + # from baseline, or skip into subsequent treatment lines) + if (is.null(p)) { + p <- Matrix(0,nrow = 1,ncol = N,sparse = TRUE) + p[1,1] <- 1 + } + + # Reduce is quite efficient here, I imagine it's hard to beat efficiency wise. + # Reduce is a kind of advanced for loop which lets you iterate over an entire list + # each round in a safe environment. This is useful here as without committing things + # to .GlobalEnv repeatedly (slow) we can iterate over p extrapolating the population + # in every health state in a very efficient way: + + # Remember that every row of M after the 2nd one is STATIC, it never changes! + + M1 <- M[1:2,] + M2 <- M[3:N,] + + cat(paste0("Extrapolating a model with ", N, " health states for ",TH," model cycles, give me a minute...\n")) + + pb <- txtProgressBar( + min = 1, + max = TH-1, + initial = 1, + width = 50, + style = 3, + char = "=" + ) + + trace_separate <- Reduce( + x = 1:(TH-1), + init = p, + accumulate = TRUE, + f = function(prev, t) { + + setTxtProgressBar(pb, t) + + # get the 1L probabilities, put them in a co-ordinate matrix and replace + # those elements of M. Note that t starts from 1 but model cycles start from 0. + # Therefore the first element of this output is p_1 (not p_0). p_0 can be added + # at the end as we already know it, so why calculate it again :) + + cyc_prob <- lapply(L1_tp,\(x) x[t]) + vals_1L <- matrix( + c( + 1, 1, .subset2(cyc_prob,"stay"), + 1, 2, .subset2(cyc_prob,"disc"), + 1, 3, .subset2(cyc_prob,"next"), + 1, N, .subset2(cyc_prob,"death"), + 2, 2, .subset2(cyc_prob,"stay") + .subset2(cyc_prob,"disc"), + 2, 3, .subset2(cyc_prob,"next"), + 2, N, .subset2(cyc_prob,"death") + ), + nrow = 7, + byrow = TRUE + ) + M1[vals_1L[,1:2]] <- vals_1L[,3] + + # Now that M has been updated, we can apply the matrix multiplication as + # a block matrix multiplication by doing the variable and static parts separately + # and adding them up: + + (prev[1:2] %*% M1) + (prev[3:N] %*% M2) + + } + ) + + cat(paste0("\nDone! Sticking the rows together to give you your trace:\n")) + + # Stick them together efficiently by defining a numeric matrix (much faster than do.call(rbind,x)). + # This is the result. Note that all rows sum to 1 to at least 12 decimal places :) + # all(round(rowSums(trace),12)==1) + # + # Make this sparse as well as there's loads of zeros in there + # + return(Matrix( + matrix( + data = unlist(lapply(trace_separate,as.numeric)), + nrow = TH, + ncol = N, + byrow = TRUE + ), + sparse = TRUE + )) + +} + + +# Adding up selected columns ---------------------------------------------- + +#' Function to "consolidate" the expanded trace (whcih may have 18k+ columns!) into +#' the descrete health states that HEOR typically use to comprehend a cost-effectiveness +#' model setting. +#' +#' @param full_trace This is `M`. the fully expanded block-diagonal sparse matrix +#' @param L1_onoff_adjustment First-line on and off treatment adjustment. `TPs$L1$death_on + TPs$L1$next_on` where `f_pf_mk_ComputeTPs` was used to get `TPs` +#' @param split_list a list which can be used to add up columns before and after a time point (e.g. 10 cycles in that state) +#' @param TH Time horizon in cycles accounting for cycle 0 +#' @param n_lines nubmer of treatment lines +#' @param discFacQ discount FACTOR for QALYs (i.e. discount per cycle going from time t to TH) +#' @param discFacC discount FACTOR for costs (i.e. discount per cycle going from time t to TH) +#' +#' +#' @details We can't show all the health states as there are many thousands. It +#' uses too much data and is not informative. therefore consolidated +#' is more useful because we can observe patient flow through the +#' treatment lines. +#' +#' For example for a model with `TH=2080` and 4 lines there are `f_markov_calcN(4,2080)=14563` +#' health states. This is too many to plot or use informatively to +#' characterize a decision problem. +#' +#' Instead we add them up by discrete health state so that we can +#' generate plots and compute all non-cost elements of the model +#' (which do not currently require per time in state level calculations) +#' +f_markov_traceConsolidator <- + function(full_trace, + L1_onoff_adjustment, + split_list = NULL, + TH, + n_lines, + discFacQ, + discFacC) { + + + # Validation first: + if(n_lines > 1) { + target_names <- c( + "L1_on", + "L1_off", + unlist(lapply(paste0("L",2:n_lines),function(lam) { + paste0(lam,c("_on","_off")) + })), + "BSC" + ) + } else { + target_names <- c( + "L1_on", + "L1_off", + "BSC" + ) + } + + if (!is.null(split_list)) { + if(!all(names(split_list) %in% target_names)) stop("The names in your split_list don't match what they should be!") + } + + N <- ncol(full_trace) + + # Good, so we have everything we need in split list. + # Now we work out some co-ordinates. it's simply a series of multiplications of TH + + # Figure out which columns of the trace correspond to what treatment lines: + col_starts <- c(1:3,(TH * 1:((N-3)/TH))+3) + names(col_starts) <- c(target_names,"dead") + + # Calculate entrants entrants for all lines 2+. + L1_on <- full_trace[,1] + L1_exit <- L1_on - shift(L1_on,type = "lead",fill = 0) + L1_off_entrants <- L1_exit - (L1_on*L1_onoff_adjustment) + + entrants <- full_trace[,col_starts[3:length(col_starts)]] + entrants[,ncol(entrants)] <- c(0,diff(entrants[,ncol(entrants)])) + colnames(entrants) <- c(target_names[3:length(target_names)],"dead") + + entrants <- cbind(L1_off = L1_off_entrants,entrants) + + + line_labs <- names(col_starts) + names(line_labs) <- line_labs + + # We can generate some useful stuff right from the off. for example, OS & first-line-PFS (s for survival): + + s <- list() + + # Overall and 1L population + s$OS <- rowSums(full_trace[,1:(N-1)]) + s$L1 <- rowSums(full_trace[,1:2]) + + # total line populations over time by treatment status (i.e. with no splits): + s$full_lines <- lapply(1:(length(col_starts) - 1), function(living_state) { + + col_index <- col_starts[living_state]:(col_starts[living_state+1]-1) + + if (length(col_index) == 1) { + full_trace[,col_index] + } else { + rowSums(full_trace[,col_index]) + } + }) + s$full_lines[[length(s$full_lines) + 1]] <- full_trace[,N] + s$full_lines <- matrix(unlist(s$full_lines),ncol = length(line_labs)) + colnames(s$full_lines) <- line_labs + + s$entrants <- entrants + s$col_starts <- col_starts + + # discFacC <- diag(discFacC) + # discFacQ <- diag(discFacQ) + + # Discounted versions: + s$disc <- list( + C = list( + full_lines = discFacC * s$full_lines, + entrants = discFacC * s$entrants + ), + Q = list( + full_lines = discFacQ * s$full_lines, + entrants = discFacQ * s$entrants + ) + ) + + # split list is optional + if (!is.null(split_list)) { + # cycle through the line labs to help us select the correct columns. Note, no point + # in splitting the dead population so we're not doing it. + s$split_pop <- lapply(3:(length(line_labs) - 1), function(iline) { + tline <- line_labs[iline] + split_settings <- split_list[[tline]] + + if (split_settings$any_split == FALSE) { + # not splitting this line, don't return anything as we've already calculated + # it in full_lines + return(NULL) + } else { + # We're going to split this population by before and after the multiple + # split times we've been given: + + col_index <- col_starts[iline]:(col_starts[iline+1]-1) + + split_pops <- do.call( + cbind, + lapply(split_settings$t, function(t_split) { + matrix( + data = c( + rowSums(full_trace[,col_index[1:(t_split-1)]]), + rowSums(full_trace[,col_index[t_split:length(col_index)]]) + ), + ncol = 2, + byrow = FALSE, + dimnames = list( + NULL, + c(paste0(tline,"_split",t_split,"_before"),paste0(tline,"_split",t_split,"_after")) + ) + ) + }) + ) + } + }) + names(s$split_pop) <- line_labs[3:(length(line_labs) - 1)] + } + + return(s) +} + + + +# Updated M related functions --------------------------------------------- + +#' Second version of f_markov_M_compiler. Functionality is the same as f_markov_M_compiler, +#' but the expected values of `tp` are different (the result of function `f_pf_mk_ComputeTPs` instead of `f_markov_M_prep`) +#' +#' @param tp the result of `f_pf_mk_ComputeTPs` to compute the TPs from the extrapolated survival +#' @param n_lines the number of treatment lines +#' @param nt Number of non-tunnel states before the tunnels start (default is 2 for 1L_on and 1L_off) +#' @param verbose additional console output on progress of compiling `M` +#' +#' +#' @details See function `f_markov_M_compiler`. Generates a block-diagonal sparse +#' matrix, with diagonal elements starting from entry to the first tunnel +#' state and ending at death. Matrix-multiplication of a vector of population +#' against this `M` computes tunnel states because the only ways out of +#' the tunnels are off treatment, death, next line for on-treatment states +#' and death and next line for off-treatment states. +#' +#' this approach also allows different complex vectors of costs and so on +#' to be applied given time in state, such that those going to a line +#' will first get the initiation cost of that line then follow the dosing +#' schedule and any stopping rules, titration, stopping rules within. +#' +#' This `M` approach allows a great deal of flexibility in a markov sequencing +#' setting, which would not be possible in Excel. +#' +f_markov_M_compiler_TPupdate <- function(tp, n_lines, TH, nt = 2, verbose = FALSE) { + + # Compute the size of the required matrix M + N <- f_markov_calcN(n_lines = n_lines, TH = TH) + + if (verbose) cat(paste0("Generating matrix M (",N,"x",N,")\n")) + if (verbose) cat(paste0("state count: 2 for 1L on/off, ", TH*2, " for each active line (",(TH*2)*(n_lines-1),"), ", TH, " for BSC, and 1 for dead\n")) + # Part 2: define M + + M <- Matrix(data = 0,nrow = N, ncol = N,sparse = TRUE) + M[N,N] <- 1 # dead is dead, you can't be come undead. + + if (verbose) cat("Finding diagonal start points\n") + # Part 3: compute top left elements + if (n_lines > 1) { + line_vec <- 1:(n_lines-1) + tl_elements <- do.call( + rbind, + lapply(line_vec, function(i_tun) { + f_markov_topleftFinder( + tun_n = i_tun, + TH = TH, + nt = 2 + ) + }) + ) + } + + if (verbose) cat("Adding in BSC/NT tunnel M\n") + # Add in BSC. patients coming out of the last line of therapy go in here if they're not dead, + # so it's the same row, next column as the end of the lad + + if (n_lines == 1) { + tl_elements <- f_markov_topleftFinder( + tun_n = 1, + TH = TH, + nt = 2 + )[1,] + } else { + tl_elements <- rbind( + tl_elements, + matrix( + data = c(max(tl_elements[, "col"]), max(tl_elements[, "col"]) + 1), + nrow = 1, + dimnames = list("BSC_stay", c("row", "col")) + ) + ) + } + + + # Part 4: populate the matrix: + # + # Right, now we have all the data, all the starting points, and the matrix + # to put it all into. We can proceed to populate. Let's start with first line + # as it's different from the others. + # Assume cycle = 1, which it should be every time this function is used! + cyc_prob <- lapply(.subset2(tp,"L1"),function(x) x[1]) + vals_1L <- matrix( + c( + 1, 1, .subset2(cyc_prob,"stay_on"), + 1, 2, .subset2(cyc_prob,"disc_on"), + 1, 3, .subset2(cyc_prob,"next_on"), + 1, N, .subset2(cyc_prob,"death_on"), + 2, 2, .subset2(cyc_prob,"stay_off"), + 2, 3, .subset2(cyc_prob,"next_off"), + 2, N, .subset2(cyc_prob,"death_off") + ), + nrow = 7, + byrow = TRUE + ) + M[vals_1L[,1:2]] <- vals_1L[,3] + + + # Note that we made all of our replacements in one go by making a matrix + # of matrix co-ordinates and values. If we do the same for all of our values + # we can enter all of the data needed in one fell swoop. We're therefore going + # to take the same approach as above, but on a bigger scale! + # + # To do that, we're going to do what we did in the definition of our function + # f_vec_in_mat_diag - make an index, and then compile our co-ordinate matrix + # using one row of tl_elements at a time along with the paired data from tp. + # + # The index will always be 0 to TH-1, and will simply be added to the start + # row and start column to identify where to put the data. + + indx <- 0:(TH - 1) + + if (n_lines > 1) { + # this is for 2L+, 1L does not have a tunnel, it's just the first 2 columns (on and off trt) + co_ordinate_matrix <- lapply(1:(n_lines-1), function(line_n) { + + # Identify the starting points for this line: + tl_tun <- f_markov_topleftFinder( + tun_n = line_n, + TH = TH, + nt = nt + ) + + # Make an id for this line which is used to put the right numbers in the right place + tline <- paste0("L",line_n+1) + + # Pull out the columns to put as the data to enter in M + line_tp <- .subset2(tp,line_n+1) + + # make co-ordinate values and value values for the on and off treatment tunnels including + # death for this treatment line. Note that this assumes equivalency for on/off treatment + # states for death probability and next probability. Off treatment stay needs recalculating + # as there's no discontinue, so it's 1-p_death-p_next + co_list <- list( + # on-treatment tunnel: + co_on_stay = matrix(c(tl_tun[1,"row"] + indx[-TH], tl_tun[1,"col"] + indx[-TH],.subset2(line_tp, "stay_on")[-TH]) ,ncol=3), + co_on_disc = matrix(c(tl_tun[2,"row"] + indx[-TH], tl_tun[2,"col"] + indx[-TH],.subset2(line_tp, "disc_on")[-TH]) ,ncol=3), + co_on_next = matrix(c(tl_tun[3,"row"] + indx , rep(tl_tun[3,"col"], TH) ,.subset2(line_tp, "next_on") ) ,ncol=3), + co_on_death = matrix(c(tl_tun[1,"row"] + indx , rep(N,TH) ,.subset2(line_tp, "death_on") ),ncol=3), + + # Off treatment tunnel for this line: + co_off_stay = matrix(c(tl_tun[4,"row"] + indx[-TH], tl_tun[4,"col"] + indx[-TH],.subset2(line_tp, "stay_off")[-TH]),ncol=3), + co_off_next = matrix(c(tl_tun[5,"row"] + indx , rep(tl_tun[5,"col"], TH) ,.subset2(line_tp, "next_off") ),ncol=3), + co_off_death = matrix(c(tl_tun[4,"row"] + indx , rep(N,TH) ,.subset2(line_tp, "death_off")),ncol=3) + ) + + # Bind it together into one larger matrix and return that + return(do.call(rbind,co_list)) + }) + } + + + # co_ordinate_matrix now has all of the data organised except for the BSC/NT tunnel. + # We add that here. Note that the start point is still correct if n_lines is 1 + # so we're looking at "tunnel 0" + + bsc_start_point <- f_markov_topleftFinder( + tun_n = n_lines - 1, + TH = TH, + nt = nt + )[5,"col"] + + + co_bsc <- rbind( + matrix(c(bsc_start_point + indx, bsc_start_point + 1 + indx,.subset2(tp$NT,"stay_on")) ,ncol=3), + matrix(c(bsc_start_point + indx, rep(N,TH) ,.subset2(tp$NT,"death_on")),ncol=3) + ) + + # bind it all together into one massive matrix of co-ordinates and values: + if (n_lines > 1) { + co_ordinate_matrix <- rbind(do.call(rbind,co_ordinate_matrix),co_bsc) + } else { + co_ordinate_matrix <- co_bsc + } + + + if (verbose) cat("Inserting values\n") + # Put all values into the matrix in one vectorised command: + M[co_ordinate_matrix[,1:2]] <- co_ordinate_matrix[,3] + + # The last cycle of BSC is an issue, stay probability isn't entered. + M[N-1,N-1] <- 1-M[N-1,N] + + # Well all rows sum to 1 so that's promising: + # all(round(rowSums(M),10) == 1) # reads true: + return(M) + +} + +#' Updated version of the sequence extrapolator `f_markov_sequencExtrapolator` which +#' takes results from `f_pf_mk_ComputeTPs` to populate `L1_tp`. Otherwise the same +#' as `f_markov_sequencExtrapolator`. +#' +#' @param L1_tp transition probabilities for first-line treatment, resulting from `f_pf_mk_ComputeTPs`. will change within M each model cycle (hence block matrix approach) +#' @param TH time horizon in model cycles. Not inferred so that a shorter time horizon can be entered if desired. +#' @param M TP matrix compiled using `f_markov_M_compiler` +#' @param N dimension of M, calculate either as `dim(M)[1]`, `dim(M)[2]`, or use `f_markov_calcN` +#' @param p (optional) initial population vector. Usually sparse matrix 1xN with value of 1 in 1,1. Assumed so if not entered, but does allow mixed lines at baseline if desired. +#' +#' @details See `f_markov_sequencExtrapolator` +#' +f_markov_M_extrapolator <- function(L1_tp,TH,M,N,p=NULL,prog=FALSE,verbose=FALSE) { + + # If the user doesn't provide p, make it and put everyone in first-line therapy + # at baseline (90+% of cases this is true, though sometimes reasonably people choose BSC/NT + # from baseline, or skip into subsequent treatment lines) + if (is.null(p)) { + p <- Matrix(0,nrow = 1,ncol = N,sparse = FALSE) + p[1,1] <- 1 + } + + # Reduce is quite efficient here, I imagine it's hard to beat efficiency wise. + # Reduce is a kind of advanced for loop which lets you iterate over an entire list + # each round in a safe environment. This is useful here as without committing things + # to .GlobalEnv repeatedly (slow) we can iterate over p extrapolating the population + # in every health state in a very efficient way: + + # Remember that every row of M after the 2nd one is STATIC, it never changes! + + M1 <- M[1:2,] + M2 <- M[3:N,] + + if (verbose) cat(paste0("Extrapolating a model with ", N, " health states for ",TH," model cycles, give me a minute...\n")) + + if (prog) { + pb <- txtProgressBar( + min = 1, + max = TH-1, + initial = 1, + width = 50, + style = 3, + char = "=" + ) + } + + tp1 <- as.list(t(as.data.table(L1_tp))) + + + # Update, make all the top slices of M for all cycles before extrapolating + m1_list <- lapply(1:length(L1_tp$disc_on), function(model_cycle) { + coo <- matrix( + c(1,1,.subset2(.subset2(L1_tp, "stay_on"), model_cycle), + 1,2,.subset2(.subset2(L1_tp, "disc_on"), model_cycle), + 1,3,.subset2(.subset2(L1_tp, "next_on"), model_cycle), + 1,N,.subset2(.subset2(L1_tp, "death_on"), model_cycle), + 2,2,.subset2(.subset2(L1_tp, "stay_off"), model_cycle), + 2,3,.subset2(.subset2(L1_tp, "next_off"), model_cycle), + 2,N,.subset2(.subset2(L1_tp, "death_off"), model_cycle) + ), + nrow = 7, + byrow = TRUE + ) + M1[coo[,1:2]] <- coo[,3] + return(M1) + }) + + trace_separate <- Reduce( + x = 1:(TH-1), + init = p, + accumulate = TRUE, + f = function(prev, t) { + if(prog) setTxtProgressBar(pb, t) + (prev[1:2] %*% .subset2(m1_list,t)) + (prev[3:N] %*% M2) + } + ) + + # free up RAM + rm(m1_list) + rm(M2) + rm(M) + + if (verbose) cat(paste0("\nDone! Sticking the rows together to give you your trace:\n")) + + # Stick them together efficiently by defining a numeric matrix (much faster than do.call(rbind,x)). + # This is the result. Note that all rows sum to 1 to at least 12 decimal places :) + # all(round(rowSums(trace),12)==1) + # + # Make this sparse as well as there's loads of zeros in there + # + return(Matrix( + unlist(lapply(trace_separate, function(x) x@x)), + nrow = TH, + byrow = TRUE, + sparse = TRUE + )) + +} + + + diff --git a/3_Functions/misc/cleaning.R b/3_Functions/misc/cleaning.R new file mode 100644 index 0000000..8c79a20 --- /dev/null +++ b/3_Functions/misc/cleaning.R @@ -0,0 +1,66 @@ +# Script dedicated to functions which clean up inputs from excel for entry +# into p (or to be used to generate e.g. probabilsitic draws, bounds and so on). + + + +# Patient characteristics table ------------------------------------------- + + +#' Function to clean the patient characteristic table up into a neat list for +#' entry into p. +#' +#' @param R_table_ptchar named range "R_table_ptchar" from excel inputs +#' @param lookups i$lookup as generated in the model_structure script. +#' +f_cleaning_ptchar <- function(R_table_ptchar, lookups) { + tab <- data.table(i$R_table_ptchar) + + # Translate population into numbers: + tab$Population <- lookups$ipd$pop$Number[match(tab$Population,lookups$ipd$pop$Description)] + + u_pop <- structure(unique(tab$Population),.Names = paste0("pop_",unique(tab$Population))) + u_line <- structure(unique(tab$Treatment.line),.Names = paste0("line_",unique(tab$Treatment.line))) + + # produce a list by population and line which presents cleaned inputs for + # each input. This can then go into p for a deterministic case or be used to + # generate probabilistic draws. + lapply(u_pop, function(popu) { + lapply(u_line, function(line) { + ta <- tab[Population == popu & Treatment.line == line,] + if (nrow(ta) == 0) { + ta <- tab[Population == 0 & Treatment.line == line,] + } + + # Make a tidy list: + list( + age = list( + mean = ta$Starting.age..years..Mean, + se = ta$Starting.age..years..SE, + n = ta$Starting.age..years....10.n + ), + pr_fem = list( + mean = ta$Starting...female.Mean, + se = ta$Starting...female...10.SE, + n = ta$Starting...female...10.n + ), + weight = list( + mean = ta$Body.weight..kg...10.Mean, + se = ta$Body.weight..kg...10.SE, + n = ta$Body.weight..kg...10.n + ), + prior_io = list( + mean = ta$Prior.IO...in.12.months..10.Mean, + se = ta$Prior.IO...in.12.months..10.SE, + se = ta$Prior.IO...in.12.months..10.n + ), + pr_i_rsk = list( + mean = ta$Starting...PorI.risk.Mean, + se = ta$Starting...PorI.risk.SE, + n = ta$Starting...PorI.risk...10.n + ) + ) + + }) + }) +} + diff --git a/3_Functions/misc/discounting.R b/3_Functions/misc/discounting.R new file mode 100644 index 0000000..13f0928 --- /dev/null +++ b/3_Functions/misc/discounting.R @@ -0,0 +1,34 @@ +#' Simple function to compute a discount factor given time vector in years from +#' `t=0` to time horizon, the discount rate, and a flag for method. +#' +#' @param r discount rate (annual) +#' @param t_yr vector of time in years of length time horizon +#' @method flag for method +#' +#' +f_discFac <- function(r, t_yr, method = "classic cycle time") { + stopifnot(method %in% c("classic floor","classic cycle time","continuous")) + if(method == "classic floor") { + # classic increment discounting using floor of year + return(1 / ((1 + r) ^ floor(t_yr))) + } else if (method == "classic cycle time") { + return(1 / ((1 + r) ^ t_yr)) + } else if (method == "continuous") { + # Continuous discounting using e^rt + 1/exp(r * t_yr) + } +} + +#' Time step converter using exponential assumption (Box 3.1, Briggs et al.) +#' Get the ratio of target time step to origin time step, Convert to r +#' in SAME time unit (i.e. / 1), THEN convert to P in target time unit +#' +#' @param cl_orig original cycle length +#' @param cl_target target cycle length +#' @param P probability in one original cycle length +#' +f_misc_tConvertExp <- function(cl_orig, cl_target, P) { + t <- cl_target / cl_orig + r <- -log(1-P) + return(1-exp(-r*t)) +} diff --git a/3_Functions/misc/fpnma_fns.R b/3_Functions/misc/fpnma_fns.R new file mode 100644 index 0000000..58ac2e6 --- /dev/null +++ b/3_Functions/misc/fpnma_fns.R @@ -0,0 +1,189 @@ +# code to manipulate FPNMA inputs, generate HRs by week from FPNMA coefficients etc + +#' generate HR(t) from coefficients of a fractional polynomial network meta analysis (FPNMA) +#' +#' @param TH time horizon in cycles. Note that in CE models time usually starts from 0. +#' @param wks_per_month this should be `i$i_mon_to_weeks` from the Excel model. defaults to 4.348214 +#' +#' @details `lnHR = intercept + s1*time^(power1) + s2*time^(power2), where power=0 means ln(time)` is +#' the specification of the model. rearranging that for HR gives the return in the function. +#' +#' NOTE THAT THIS ASSUMES THE TIME UNIT OF THE ANALYSIS IS MONTHS AND THE +#' COST EFFECTIVENESS MODEL IS IN WEEKS! +#' +#' +f_HRs_by_cycle <- function(TH, wks_per_month = 4.348214, exponents, coeffs) { + exponents <- as.numeric(unlist(exponents)) + coeffs <- as.numeric(unlist(coeffs)) + t <- 0:TH + t_month <- t / wks_per_month + return(exp(coeffs[1] + coeffs[2] * t_month ^ exponents[1] + coeffs[3] * t_month ^ exponents[2])) +} + + + +#' Function to tidy up the input data for the FPNMA specifically for the PATT RCC model +#' +#' @details this function should not be used anywhere else as it is highly specific +#' to this decision problem, including implicit assumptions! +#' +f_FPNMA_tidy_and_add_exponents <- function(PSAcoefficients, exponents) { + + exponents$Population <- exponents$Endpoint <- NA + exponents$Endpoint[exponents$Outcome == "OS"] <- 0 + exponents$Endpoint[exponents$Outcome == "PFS"] <- 1 + exponents$Population[exponents$Risk.group == "All"] <- 0 + exponents$Population[exponents$Risk.group == "Intermediate/poor"] <- 1 + exponents$Line[exponents$Line == "1L"] <- 1 + exponents$Line[exponents$Line == "2L+"] <- 2 + exponents$Line <- as.numeric(exponents$Line) + + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "molecule"] <- "Molecule" + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "risk"] <- "Population" + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "endpoint"] <- "Endpoint" + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "line"] <- "Line" + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "referencetreatment"] <- "Reference.treatment" + colnames(PSAcoefficients)[colnames(PSAcoefficients) == "referencetrial"] <- "Reference.trial" + + PSAcoefficients <- merge(PSAcoefficients, exponents, all.x = TRUE) + PSAcoefficients <- PSAcoefficients[order(PSAcoefficients$run, + PSAcoefficients$Population, + PSAcoefficients$Line, + PSAcoefficients$Endpoint),] + data.table(PSAcoefficients) +} + +#' Function to generate FPNMA coda from coefficients. Highly specific to PATT RCC model. +f_generate_FPNMA_coda <- function(coeffs, TH, wks_per_month) { + rbindlist(lapply(1:nrow(coeffs), function(row_in_table) { + # Get data for this row: + id <- as.data.table(lapply(coeffs[row_in_table,list(Population,Line,Molecule,Endpoint,Reference.treatment,Reference.trial)], function(x) rep(x,TH+1))) + id$time <- 0:TH + id$HR <- f_HRs_by_cycle( + TH = TH, + wks_per_month = wks_per_month, + exponents = coeffs[row_in_table, list(Exponent.1, Exponent.2)], + coeffs = coeffs[row_in_table, list(intd, s1, s2)] + ) + # correct for non-finite HRs. These are usually in time 0 where the HR is 1 + # in all contexts as no events are possible yet. + id$HR[!is.finite(id$HR)] <- 1 + return(id) + })) +} + + +#' Quick function for FPNMA: rebase the FPNMA HRs to use cabo in second line +#' +#' @param FPNMAdata FPNMA data. Deterministic means or extrapolated "PSA" versions (one of the posterior draws / CODA sample) +#' +#' @details rebases the hazard ratios to be in reference to molecule 8 not molecule +#' 10 in second-line. This is done for practical reasons (the reference curve available +#' and used in the base-case is molecule 8 for 2nd line therapy) +#' +f_rebase_for_cabo_as_ref_in_2L <- function(FPNMAdata) { + + # Rebasing to allow use of cabo as reference treatment in 2nd line + + FPNMArebasecabo <- FPNMAdata[Line==2,] + EvevscaboHROS <- FPNMArebasecabo[Molecule == 8 & Endpoint == 0] + EvevscaboHRPFS <- FPNMArebasecabo[Molecule == 8 & Endpoint == 1] + FPNMArebasecabo[Endpoint == 0]$HR <- FPNMArebasecabo[Endpoint == 0]$HR / EvevscaboHROS$HR + FPNMArebasecabo[Endpoint == 1]$HR <- FPNMArebasecabo[Endpoint == 1]$HR / EvevscaboHRPFS$HR + FPNMArebasecabo$Reference.treatment <- 8 + + EveOSHRs <- + cbind( + time = seq(2, 2080), + Molecule = rep(10, 2079), + Reference.treatment = rep(8, 2079), + Line = rep(2, 2079), + Endpoint = rep(0, 2079), + Population = rep(0, 2079), + Reference.trial = rep(1, 2079), + HR = EvevscaboHROS$HR + ) + EvePFSHRs <- + cbind( + time = seq(2, 2080), + Molecule = rep(10, 2079), + Reference.treatment = rep(8, 2079), + Line = rep(2, 2079), + Endpoint = rep(1, 2079), + Population = rep(0, 2079), + Reference.trial = rep(1, 2079), + HR = EvevscaboHRPFS$HR + ) + + FPNMArebasecabo <- rbind(FPNMArebasecabo, EveOSHRs, EvePFSHRs) + + FPNMAdata <- rbind(FPNMAdata, FPNMArebasecabo) + + FPNMAdata +} + +#' Quick function for FPNMA: assume that HRs for 3L are the same as for 2L +#' +#' @param FPNMAdata FPNMA data. Deterministic means or extrapolated "PSA" versions (one of the posterior draws / CODA sample) +#' +#' @details Simply applys HRs for 2L to 3L so that the relative efficacy network +#' reflects this assumption duing compilation and propagation. +#' +f_3L_rel_effect_same_as_2L <- function(FPNMAdata) { + + assume3L <- FPNMAdata[Line==2,] + assume3L$Line <- 3 + FPNMAdata <- rbind(FPNMAdata,assume3L) + + assumeTTD <- FPNMAdata[Endpoint==1,] + assumeTTD$Endpoint <- 2 + FPNMAdata <- rbind(FPNMAdata,assumeTTD) + + assumeTTP <- FPNMAdata[Endpoint==1,] + assumeTTP$Endpoint <- 3 + FPNMAdata <- rbind(FPNMAdata,assumeTTP) + + FPNMAdata +} + + +#' generate "destinations" object, which directs the later functions in terms of +#' where to place the FPNMA hazard ratios. +#' +#' @details this function simply takes the first row for each grouping and then +#' drops the hazard ratio part, returning just information on "destinations". These +#' "destinations" then are fed into later functions like `f_NMA_linkFPNMA` and the +#' information therein is used to place the extrapolated hazard ratios from the +#' FPNMA to the right population, line, molecule, trial, endpoint (PLMTE) location. +#' The terms "origin" and "destination" are used instead of "intervention" and +#' "reference" because the reference curves can be at multiple degrees of separation away +#' from their ultimate destinations (e.g. curve --> FPNMA HRs --> assumed same as --> apply HR to). +#' +#' IMPORTANT NOTE: note that a copy of the destinations for reference.trial of 2 +#' (real-world evidence) is made here. This is an assumption of the specific +#' adaptation for the RCC PATT model, which may not hold in other circumstances. +#' Consequently, this function is specific to the RCC model and not the generic +#' pathway +#' +#' +f_gen_destinations <- function(fp_data){ + + destinations <- fp_data[, (head(.SD, 1)), by = list(Population,Line,Molecule,Endpoint,Reference.treatment,Reference.trial)] + destinations <- destinations[,list(Population, Line, Molecule, Endpoint, Reference.treatment, Reference.trial)] + + destinations_temp <- destinations + destinations_temp$Reference.trial <- 2 + destinations <- rbind(destinations,destinations_temp) + + return(destinations) +} + + +#' Function to add a copy of the FPNMA data which sets the reference.trial to 2, +#' which then assumes that the RWE HRs are the same as the trial-based +f_add_reference_trial_2 <- function(fp_data){ + means_temp <- fp_data + means_temp$Reference.trial <- 2 + output <- rbind(fp_data,means_temp) + output +} diff --git a/3_Functions/misc/matrix.R b/3_Functions/misc/matrix.R new file mode 100644 index 0000000..715f04d --- /dev/null +++ b/3_Functions/misc/matrix.R @@ -0,0 +1,297 @@ +# Extremely useful functions for matrix operations + + + +# diagonal vector assignment ---------------------------------------------- + +# Function to allow diagonal insertion of a vector into a matrix in an arbitrary +# direction and position. Extremely useful for the tunnel state stuff central +# to this disease model. + +# Courtesy of https://stackoverflow.com/questions/28746023/how-to-insert-values-from-a-vector-diagonally-into-a-matrix-in-r +# there is in fact a way to stick elements diagonally into an existing matrix: + + +#'Function to insert a vector diagonally into a matrix at an arbitrary location. +#'Assumes south easterly direction of insertion unless option is FALSE +#' +#'@param A A matrix - doesn't have to be square, but the bounds cannot be breached by b +#'@param b A vector - obviously mustn't go outside of the matrix when populating +#'@param start_row the row to "start" from: for nw and ne direction, it will start from `start_row + length(b) - 1` +#'@param start_col the column to "start" from: for sw and nw direction, it will start from `start_col + length(b) - 1` +#' +#'@examples +#' +#'M <- matrix(nrow=10,ncol=10) +#' +#'f_vec_in_mat_diag( +#' A = M, +#' b = runif(5), +#' start_row = 3, +#' start_col = 4, +#' direction = "se" +#') +#' +#' +#'f_vec_in_mat_diag( +#' A = M, +#' b = runif(5), +#' start_row = 3, +#' start_col = 4, +#' direction = "ne" +#') +#' +#' +f_vec_in_mat_diag <- function(A, b, start_row = 1, start_col = 1, direction = "se") { + if(direction == "se") { + # Starting from top left moving south east (default) + indx <- 0:(length(b) - 1) + ind_mat <- matrix(c(start_row + indx, start_col + indx),ncol=2) + } else if (direction == "nw") { + # starting from bottom right moving north west + indx <- (length(b) - 1):0 + ind_mat <- matrix(c(start_row + indx, start_col + indx),ncol=2) + } else if (direction == "sw") { + # starting from top right going south west + indxr <- 0:(length(b) - 1) + indxc <- (length(b) - 1):0 + ind_mat <- matrix(c(start_row + indxr, start_col + indxc),ncol=2) + stop("south west (sw) and north west (nw) are not supported...yet!") + } else if (direction == "ne") { + # starting from bottom left going north east + indxr <- (length(b) - 1):0 + indxc <- 0:(length(b) - 1) + ind_mat <- matrix(c(start_row + indxr, start_col + indxc),ncol=2) + } else { + stop('direction must be one of "se" (default), "nw", "sw", or "ne"') + } + # put the values in the matrix using the co-ordinate matrix & return the result + A[ind_mat] <- b + return(A) +} + + + +# So, we now have a Markov trace for our treatment sequence. +# We want to apply the cost vector that we have to diagonals +# starting from 1,1 to TH,TH, then 2,1 to TH,TH-1 then 3,1 to TH,TH-2 +# and so on with that same vector each time, but 1 shorter! + +# The MOST efficient way to populate a matrix in R is to define a co-ordinate +# matrix, which populates a bunch of elements all at once. This is a matrix +# of 3 columns, row, column and value. + +# As we know what our values are each time, and we also know our rows and columns, +# we can define one of these. To make it easier, I define some functions which +# make the value column + + +# Only used within this script +f_mat_diagDownCM <- function(TH,vals) { + + # V is for values - this is the matrix which is going to get + # populated in the end + V <- Matrix(data = 0,nrow = TH, ncol = TH,sparse = TRUE) + V[TH,TH] <- 1 + + # Make the co-ordinate matrix by first defining the 3 columns, then + # collapsing them into a matrix: + one_to_TH <- 1:TH + row_numbers <- Reduce( + x = 1:(TH-1), + init = one_to_TH, + accumulate = TRUE, + f = function(prev, cyc) { + (one_to_TH + cyc)[1:(TH-cyc)] + } + ) + + # To see what this looks like, look at this: + # row_numbers[(length(row_numbers)-3):length(row_numbers)] + # + # This shows the last 4 runs, with the first element being the cycle that someone + # enters the state in. Collapsing this with unlist() gives us our rows column: + row_numbers <- unlist(row_numbers,use.names = FALSE) + + + # column number + col_numbers <- Reduce( + x = 1:(TH-1), + init = one_to_TH, + accumulate = TRUE, + f = function(prev, cyc) { + 1:(TH-cyc) + } + ) + + # To see this: + # col_numbers[(length(col_numbers)-3):length(col_numbers)] + # + # It moves down into the bottom left such that the people coming into this line + # in cycle TH-1 are in the TH-1 th row in the 1st column: + col_numbers <- unlist(col_numbers,use.names = FALSE) + + # So the co-ordinates for the last 10 elements to put in our matrix V + # are: + # cbind(tail(row_numbers,10),tail(col_numbers,10)) + + # Now all we need to do is the same thing again for the values: + vals_to_enter <- Reduce( + x = 1:(TH-1), + init = vals, + accumulate = TRUE, + f = function(prev, cyc) { + vals[1:(TH-cyc)] + } + ) + vals_to_enter <- unlist(vals_to_enter,use.names = FALSE) + + # vals_to_enter[(length(vals_to_enter)-3):length(vals_to_enter)] + # cbind(tail(row_numbers,10),tail(col_numbers,10),tail(vals_to_enter,10)) + + # So now we have all 3 of our co-ordinates!: + + co_ord <- matrix( + c(row_numbers,col_numbers, vals_to_enter), + ncol = 3 + ) + + V[co_ord[,1:2]] <- co_ord[,3] + + return(V) + +} + +if (FALSE) { + # Function to populate a matrix which should be element-wise multiplied by a vertical slice (columns between two points) of TRACE + library(Matrix) + + TH <- 2089 + + # example dose schedule, dose increases over time to 100, patients are dosed every other week throughout. + dose_over_time <- c(c(10,0),c(20,0),c(35,0),rep(c(50,0),2),rep(c(100,0),ceiling((TH-10)/2)))[1:TH] + + # Apply stopping rule to dose schedule + stopping_rule <- 104 + dose_over_time[stopping_rule:TH] <- 0 + + # Apply RDI + rdi <- 0.8 + dose_over_time <- dose_over_time * rdi + + # Not going to do wastage in this example, just cost per mg for simplicity: + cost_per_mg <- 5 + + # Cost schedule including dose changes over time and stopping rule. + cost_over_time <- dose_over_time * cost_per_mg + + # This is what drug cost in this hypothetical looks like over time, thus correctly allowing people to discontinue treatment between doses. + plot(cost_over_time[1:(52*3)],type="l") + + # Load in some fitted survival data from the model (you have this rds file in your branch so it should just run) + st_list <- readRDS("./1_Data/st_list_for_dawn.rds") + + # Get our compiling and extrapolating functions + source(file.path("3_Functions/markov/markov.R")) + source(file.path("3_Functions/patient_flow/markov.R")) + source(file.path("./3_Functions/markov/TEMP M compiler.R")) + + + # Work out transition probabilities + TPs <- f_pf_mk_ComputeTPs(st_list) + + # Calculate the trace - note that n_lines does NOT include BSC (it's active treatment lines), take 1 of the length of st_list. This fixes empty columns issue I think!: + M <- f_markov_M_compiler_TPupdate( + tp = TPs, + n_lines = length(st_list)-1, + TH = TH, + nt = 2, + verbose = TRUE + ) + + TRACE <- f_markov_M_extrapolator( + L1_tp = TPs$L1, + TH = TH, + M = M, + N = f_markov_calcN(n_lines = length(st_list)-1, TH = TH), + verbose = TRUE, + prog = TRUE + ) + + + + cost_matrix <- f_mat_diagDownCM(TH,cost_over_time) + + # Let's say that we're going to apply these costs to 2L on treatment: + line_2_starts_where <- 3 + line_2_cols <- line_2_starts_where:(TH+line_2_starts_where-1) + + line_2_on_pop <- TRACE[,line_2_cols] + + # Note one can't get into 2L on until cycle 3 start, so + cost_per_cycle <- line_2_on_pop[3:TH,] * cost_matrix[1:(TH-2),] + + # Results: + plot(rowSums(cost_per_cycle), type="l") + sum(cost_per_cycle) + + # So for pre-104 week population: + rowSums(cost_per_cycle[,1:104]) + + # Rest should be 0 + rowSums(cost_per_cycle[,105:TH]) + + + # Now, using the consolidator: + split_list <- lapply(1:(((length(st_list))*2)-1), function(line_n) { + list(any_split = FALSE, t = NULL) + }) + names(split_list) <- c( + unlist(lapply(1:(length(st_list)-1), function(line_n) { + c(paste0("L",line_n,"_on"),paste0("L",line_n,"_off")) + })), + "BSC" + ) + + # Add in the details for our hypothetical treatment: + # + # - Dose loading lasts for 10 cycles + # - Stopping rule at 104 cycles + # + split_list$L2_on$any_split <- TRUE + split_list$L2_on$t <- c(10,104) + + # Get the consolidated trace: + consolidated_trace <- f_markov_traceConsolidator( + full_trace = TRACE, + split_list = split_list, + TH = TH, + n_lines = length(st_list)-1 + ) + + # Just to check our populations are right: + sum(rowSums(line_2_on_pop) - consolidated_trace$full_lines[,"L2_on"]) + + # Combinate 1 has dose loading of 10 cycles, full dose up to cycle 104 then 0 cost after: + cost_during_loading <- consolidated_trace$split_pop$L2_on[,"L2_on_split10_before"] * mean(cost_over_time[1:10]) + full_dose_cost <- (consolidated_trace$split_pop$L2_on[,c("L2_on_split104_before")] - consolidated_trace$split_pop$L2_on[,c("L2_on_split10_before")]) * mean(cost_over_time[11:104]) + + # Add them up: + combinate_1_cost <- cost_during_loading + full_dose_cost + + # Compare: + sum(combinate_1_cost) + sum(cost_per_cycle) + + # Very similar, but not the same. Wonder if it's a reasonable approximation? + # + # + # % error: + ((sum(combinate_1_cost)-sum(cost_per_cycle))/sum(cost_per_cycle))*100 + + # 0.823% difference between doing it the simple way and fully multiplying out. + # + # Is it worth the massive increase in computations? + + +} diff --git a/3_Functions/misc/nesting.R b/3_Functions/misc/nesting.R new file mode 100644 index 0000000..5eebdbf --- /dev/null +++ b/3_Functions/misc/nesting.R @@ -0,0 +1,107 @@ + + +#' Function f_NestedApply: a function to impose the nesting structure required +#' for the model. This function makes sure that +#' within the structure, an arbitrary set of code can +#' be automatically applied to it without the need +#' to repeatedly type out all of the nesting. This +#' keeps all of the bits of data (which may differ in +#' terms of shape, size, class, themselves being nested, +#' a list or even a regression object) separate in their +#' own named spaces, whilst also allowing the convenience +#' of universally applying a function (one could feasibly +#' add conditional logic into the arbitrary code here +#' to allow additional flexibility) +#' +#'@param mylist a list which strictly follows the set list structure. if it differs this will error! +#'@param f any R function, OR a series of lines of code wrapped in "{}" just like a lapply +#' +f_NestedApply <- function(mylist,f) { + lapply(mylist, function(this_pop) { + lapply(this_pop, function(this_line) { + lapply(this_line, function(this_mol) { + lapply(this_mol, function(this_trial) { + lapply(this_trial, function(this_endpoint) { + f(this_endpoint) + }) + }) + }) + }) + }) +} + + +# list_surv_survivaldata in the model is defined early and uses this structure. +# to do something like see what the unique values are in certain columns +# of this dataset: + + +# # Testing that each of he identifier columns for the dataset matchess the place +# # it is in the list: +# f_NestedApply(list_surv_survivaldata, f = function(obj) { +# unlist(sapply(obj,unique)[c("population", "line", "molecule", "trial", "endpoint")]) +# }) +# +# # maximum follow up: +# f_NestedApply(list_surv_survivaldata, f = function(obj) max(obj$timew)) +# +# # Earliest recorded non-censor event +# f_NestedApply(list_surv_survivaldata, f = function(obj) { +# c( +# event_censor_0 = min(obj[event_censor == 0,]$timew), +# event_censor_1 = min(obj[event_censor == 1,]$timew) +# ) +# }) +# +# # Basic survival analysis +# all_TSD14 <- f_NestedApply(list_surv_survivaldata, f = function(obj) { +# if (nrow(obj) > 0) { +# # Do survival analysis if the data has any rows! +# +# lapply (c( +# gengamma = "gengamma", +# exp = "exp", +# weibull = "weibull", +# lnorm = "lnorm", +# gamma = "gamma", +# gompertz = "gompertz", +# llogis = "llogis", +# lognormal = "lognormal" +# ), function(distr) { +# reg <- +# flexsurvreg( +# formula = Surv(timew, event_censor) ~ 1, +# data = obj, +# dist = "gengamma" +# ) +# +# return(list( +# coef = coefficients(reg), +# vcov = vcov(reg), +# fit = c(AIC = AIC(reg), BIC = BIC(reg), ll = logLik(reg)) +# )) +# }) +# } +# }) +# + + +#' Version of `f_NestedApply` with definable levels. Allows applying on PLMTE level +#' +#' @param mylist list following PLMTE nesting format +#' @param f a function to apply like sum, length, head etc. can use `function(x) ...` format if useful +#' @param levels an indicator of levels +#' +function_NestedApply <- function(mylist,f,levels = 5) { + lapply(mylist, function(this_pop) { + if(levels>1) {lapply(this_pop, function(this_line) { + if(levels>2){lapply(this_line, function(this_mol) { + if(levels>3){lapply(this_mol, function(this_trial) { + if(levels>4){lapply(this_trial, function(this_endpoint) { + f(this_endpoint) + })} + })} + })} + })} + }) +} diff --git a/3_Functions/misc/other.R b/3_Functions/misc/other.R new file mode 100644 index 0000000..6e436a7 --- /dev/null +++ b/3_Functions/misc/other.R @@ -0,0 +1,38 @@ + +#' Test the lengths of multiple objects are the same +f_misc_all_same_length = function(...){ + length(unique(lengths(list(...)))) == 1 +} + + +#' Very useful function to extract a PLMTE from a nested list. Used within +#' the relative efficacy and throughout the model. +#' +#' @param dat nested list following the PLMTE format (population line molecule trial endpoint) +#' @param l list containing location information. MUST have pop line mol trial endpoint entries +#' @tr_or allows alternative entry for `l$trial` in case it's needed to look at another trial's PLM&E +#' +#' +f_misc_get_plmte <- function(dat,l, tr_or = NULL) { + list2env(l,envir = environment()) + if (!is.null(tr_or)) { + dat[[pop]][[line]][[mol]][[tr_or]][[endpoint]] + } else { + dat[[pop]][[line]][[mol]][[trial]][[endpoint]] + } +} + + +#' fun function to print to console with some colour +f_misc_colcat <- function(txt,col_num = 32) { + cat( + paste0( + "\033[0;", + col_num, + "m", + txt, + "\033[0m", + "\n" + ) + ) +} \ No newline at end of file diff --git a/3_Functions/misc/plotting.R b/3_Functions/misc/plotting.R new file mode 100644 index 0000000..c419bf2 --- /dev/null +++ b/3_Functions/misc/plotting.R @@ -0,0 +1,77 @@ +#' Function to make a state residency plot - useful for the PS model and checking that +#' states sum to 1. The integral of each line is lifetime expected time in state `:)` +#' +#' @param sr_list a list of s_t per endpoint to be included in the plt +#' @param t_yr time in years per cycle +#' +f_plot_srPlot <- function(sr_list, t_yr, plot_x_lim) { + p_dat <- data.table::rbindlist(lapply(structure(names(sr_list)[1:(length(names(sr_list))-1)],.Names=names(sr_list)[1:(length(names(sr_list))-1)]), function(endpoint) { + data.table( + t_yr = t_yr, + s_t = .subset2(sr_list,endpoint), + endp = endpoint + ) + }))[t_yr <= plot_x_lim,] + ggplot(p_dat, aes(x = t_yr, y = s_t, colour=endp)) + + geom_line() + + theme(legend.position = "bottom") + + theme_classic() + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) +} + + + +# Markov trace plots ------------------------------------------------------ + +#' Function to draw a consolidated trace as a plot +#' +#' @param consol_trace consolidated trace produced during ST model (added up columns) +#' @param treatment_names names to allocate to the legend for each line +#' @tmax time max. Function assumes t is in years with weekly cycle length. +#' +#' +f_plot_mk_draw_consol_trace <- function(consol_trace,treatment_names , tmax = 15) { + plot_dt <- data.table(consol_trace) + th <- nrow(plot_dt) + plot_dt$w <- 0:(th-1) + plot_dt$y <- plot_dt$w / 52.17857 + plot_dt$w <- NULL + plot_dt <- melt.data.table(plot_dt, id.vars = "y") + + trt_seq_plotLab <- c(paste0("L",1:(length(treatment_names)-1)),"NT") + names(trt_seq_plotLab) <- trt_seq_plotLab + + plot_dt <- Reduce( + x = 1:length(treatment_names), + init = plot_dt, + accumulate = FALSE, + f = function(prev, trt_n) { + + if (trt_n < length(treatment_names)) { + which_on <- which(prev$variable == paste0(names(trt_seq_plotLab)[trt_n],"_on")) + which_off <- which(prev$variable == paste0(names(trt_seq_plotLab)[trt_n],"_off")) + prev[which_on,]$variable <- paste0(treatment_names[trt_n]," (on treatment)") + prev[which_off,]$variable <- paste0(treatment_names[trt_n]," (off treatment)") + } else { + # this is BSC + which_ones <- which(prev$variable == "BSC") + prev[which_ones,]$variable <- treatment_names[trt_n] + } + + return(prev) + } + ) + + # We really did it! A fully working trace calculator for a sequencing model in R :) + return(ggplot(plot_dt[y < tmax,],aes(x = y, y = value, colour = variable)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom") + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + + scale_y_continuous(labels = scales::percent,expand = expansion(mult = c(0,0.05))) + + labs(x = "Years from baseline", y = "% of baseline cohort") + + guides( + colour = guide_legend(title = NULL) + )) +} diff --git a/3_Functions/misc/qdirichlet.R b/3_Functions/misc/qdirichlet.R new file mode 100644 index 0000000..195075d --- /dev/null +++ b/3_Functions/misc/qdirichlet.R @@ -0,0 +1,50 @@ +# There is no qdirichlet function in R. +# +# Fortunately gtools has a rdirichlet function which is easily adapted. +# +# We simply change the rgamma call to qgamma! + +qdirichlet <- function (alpha, rands) { + + # rands must be of length l*n + + l <- length(alpha) + rl <- length(rands) + + if ((rl/l) %% 1 != 0) stop(paste0("The number of rands (",rl,") is not a multiple of the length of alpha (",l,")")) + + x <- matrix(qgamma(rands, alpha), ncol = l, byrow = TRUE) + sm <- x %*% rep(1, l) + x / as.vector(sm) +} + +if(FALSE) { + # Example: + counts <- c(95,2,1) + ndraw <- 1000 + rands <- runif(length(counts)*ndraw) + + dirich_dr <- qdirichlet( + alpha = counts, + rands = rands + ) + + all(round(rowSums(dirich_dr),12)==1) + + +} + +#' Function to estimate a and b in a beta distribution from mean and variance +estBetaParams <- function(mu, var) { + alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2 + beta <- alpha * (1 / mu - 1) + return(params = list(alpha = alpha, beta = beta)) +} + +#' Function to estimate `sd(log(x))` from mean, LB and UB +estSDlog <- function(mean, LB, UB) { + LB_Sdlog <- (log(mean) - log(LB)) / 1.96 + UB_Sdlog <- (log(UB) - log(mean)) / 1.96 + sdlog <- mean(LB_Sdlog, UB_Sdlog) + return(sdlog) +} \ No newline at end of file diff --git a/3_Functions/misc/severity_modifier.R b/3_Functions/misc/severity_modifier.R new file mode 100644 index 0000000..0868eff --- /dev/null +++ b/3_Functions/misc/severity_modifier.R @@ -0,0 +1,335 @@ + +# Ensure we have access to necessary commands from elsewhere +if (!("utility_genpop" %in% names(globalenv()))) + source(here::here("3_Functions", "utilities", "age_related.R")) + +if (!("get_lifetables" %in% names(globalenv()))) + source(here::here("3_Functions", "survival", "other_cause_mortality.R")) + +#' Extract the severity modifier parameters from Excel and wrap them up nicely +#' in a list +#' +#' @param .i You can inject `.i`, which is the result of extracting variables +#' from an Excel file. If you don't inject it, the function will look +#' for `i` in the global environment. If it can't find that it will +#' provide suitable default values accompanied by a warning. +#' +#' @param hard_coded If TRUE, use the thresholds from NICE methods guidance circa 2022 +#' +severity_modifier <- function(.i = NULL, hard_coded = FALSE) { + + # If `.i` was not provided then grab `i` from the global environment + if (hard_coded) { + return(list( + breaks_absolute = c(-Inf, 12, 18, Inf), + breaks_proportional = c(-Inf, 0.85, 0.95, Inf), + qaly_weights = c(1, 1.2, 1.7) + )) + } else if (is.null(.i)) { + if ("i" %in% names(globalenv())) { + .i <- get("i", envir = globalenv()) + } else { + warning(paste("`.i` was not provided and `i` is not in the global environment,","hard-coded values are being used")) + return(list( + breaks_absolute = c(-Inf, 12, 18, Inf), + breaks_proportional = c(-Inf, 0.85, 0.95, Inf), + qaly_weights = c(1, 1.2, 1.7) + )) + } + } + + # Different names may be used + param_names <- array( + data = c( + "R_s_num_qalyShort_abs_LB", + "R_s_num_qalyShort_abs_UB", + "R_s_num_qalyShort_prop_LB", + "R_s_num_qalyShort_prop_UB", + "R_s_num_qalyShort_w_LB", + "R_s_num_qalyShort_w_bet", + "R_s_num_qalyShort_w_UB", + + "i_QALY_short_abs_LB", + "i_QALY_short_abs_UB", + "i_QALY_short_prop_LB", + "i_QALY_short_prop_UB", + "i_qaly_weight_LB", + "i_qaly_weight_between", + "i_qaly_weight_UB" + ), + dim = c(7, 2), + dimnames = list( + item = c( + "Abs. shortfall moderate threshold", + "Abs. shortfall extreme threshold", + "Prop. shortfall extreme threshold", + "Prop. shortfall moderate threshold", + "Basic QALY weight", + "Moderate severity weight", + "Extreme severity weight" + ), + naming_system = list("R_s_", "i_") + ) + ) + + # Check we can at least access one per row + variables_present <- apply( + param_names, + 1, + function(row) row[1] %in% names(.i) | row[2] %in% names(.i) + ) + if (!all(variables_present)) { + stop( + "Could not retrieve parameters from `i` or `.i`:\n", + paste("-", dimnames(param_names)[[1]][!variables_present], collapse = "\n") + ) + } + + extract_parameter <- function(item) { + possible_names <- param_names[item,] + values <- lapply(possible_names, function(name) .i[[name]]) + values[[which.max(!sapply(values, is.null))]] + } + + # Grab the values from .i and return in a list + list( + breaks_absolute = c( + -Inf, + extract_parameter("Abs. shortfall moderate threshold"), + extract_parameter("Abs. shortfall extreme threshold"), + Inf + ), + breaks_proportional = c( + -Inf, + extract_parameter("Prop. shortfall moderate threshold"), + extract_parameter("Prop. shortfall extreme threshold"), + Inf + ), + qaly_weights = c( + extract_parameter("Basic QALY weight"), + extract_parameter("Moderate severity weight"), + extract_parameter("Extreme severity weight") + ) + ) +} + +#' Get the appropriate severity modifier according to the discounted QALYs for +#' those with the disease and those without the disease +#' +#' @param qalys_disease The calculated (discounted) QALYs for people with +#' the disease receiving standard care. +#' @param qalys_nodisease The calculated (discounted) QALYs for people who +#' do not have the disease, i.e., applying population +#' utility norms and general population mortality +#' rates. +#' @param .i Allows to inject `i`, otherwise it will be sourced +#' from the global environment. +#' @param .severity_modifier Allows to inject the results of a call to +#' `severity_modifier` instead of it being called +#' every time. +#' @param hard_code_SM hard-code the severity modifier using NICE methods +#' guidance, circa 2022 +#' @param format either `table` or `console`. Table produces a one-row table, console prints results to console +#' +get_severity_modifier <- function(qalys_disease, qalys_nodisease, .i = NULL, .severity_modifier = NULL, hard_code_SM = FALSE, format="table") { + + stopifnot(format %in% c("table", "console")) + if (is.null(.severity_modifier)) + .severity_modifier <- severity_modifier(.i = .i, hard_coded = hard_code_SM) + + abs_shortfall <- qalys_nodisease - qalys_disease + prop_shortfall <- 1 - qalys_disease / qalys_nodisease + + wt_abs <- .severity_modifier$qaly_weights[cut(abs_shortfall, .severity_modifier$breaks_absolute, labels = FALSE)] + wt_prop <- .severity_modifier$qaly_weights[cut(prop_shortfall, .severity_modifier$breaks_proportional, labels = FALSE)] + + if (format == "table") { + return(data.frame( + abs_sf = abs_shortfall, + prop_sf = prop_shortfall, + modifer = max(wt_abs, wt_prop) + )) + } else { + return(max(wt_abs, wt_prop)) + } +} + +#' Calculate the Quality-Adjusted Life Expectancy (QALE) for a person, given +#' their age and sex +#' +#' @param age Age of person +#' @param sex Sex of person +#' @param .i Inject `i` instead of looking in the global environment +#' @param .p Inject `p` instead of looking in the global environment +#' @param .age_step Numerical integration step (choosing a value lower than the +#' default value of 0.2 does not change the results to 5 +#' decimal places; if the function is running slowly then it +#' can be changed to 1 for a 5x speed-up and still be accurate +#' to 3 decimal places) +#' @param .use_time_horizon If this is `TRUE` then the time horizon given in +#' `p$basic$TH_yr` will be used (if this is less than the time +#' till the person reaches their 100th birthday). This is +#' probably not what we want to do, so the default value is +#' `FALSE`... +#' +#' @details +#' In addition to being (somewhat) sensitive to the choice of which life +#' tables are used (e.g., 2017-2019 and 2018-2020 give different results +#' because of the Covid-19 pandemic which affected mortality in 2020) and +#' population utility norms, there are some calculation details which +#' distinguish this from, e.g., the shortfall calculator at +#' https://r4scharr.shinyapps.io/shortfall/ +#' +#' First, we calculate restricted QALE up to the 100th birthday, i.e., no +#' further QALE is accrued after reaching 100th birthday. +#' +#' Second, we discount continuously, so that the discount factor is a smooth +#' (exponential) function of time rather than stepping down each year. +#' +#' Note that the default value for `.age_step` is appropriate for integer +#' `age` but if `age` is non-integer then smaller values of `.age_step` may +#' be necessary for numerical convergence. +baseline_qale <- function(age, sex = c("female", "male"), .i = NULL, .p = NULL, + .age_step = 0.2, .use_time_horizon = FALSE) { + + sex <- match.arg(sex) + + if (is.null(.i)) .i <- get("i", envir = globalenv()) + if (is.null(.p)) .p <- get("p", envir = globalenv()) + + max_age <- if (.use_time_horizon) min(100, age + .p$basic$th_y) else 100 + + ages <- seq(age, max_age, by = .age_step) + + if (max(ages) < max_age) ages <- c(ages, max_age) + steps <- ages[2:length(ages)] - ages[1:(length(ages)-1)] + + utilities <- utility_genpop(ages, sex, .p) + + lifetables <- get_lifetables(.i) + + df <- data.frame( + age = ages[1:(length(ages)-1)], + t0 = ages[1:(length(ages)-1)] - ages[1], + t1 = ages[2:length(ages)] - ages[1] + ) + df$q <- approx(x = lifetables$x, y = lifetables[[paste0("q_", sex)]], xout = df$age, method = "constant")$y + df$m <- -log(1 - df$q) + df$s <- exp(-df$m * (df$t1 - df$t0)) + df$S0 <- c(1, cumprod(df$s[1:(length(df$s)-1)])) + df$r <- .p$basic$discQ + df$u0 <- utilities[1:(length(ages)-1)] + df$u1 <- utilities[2:length(ages)] + + df$auc <- mapply( + auc_step, + t0 = df$t0, + t1 = df$t1, + S0 = df$S0, + u0 = df$u0, + u1 = df$u1, + m = df$m, + MoreArgs = list(r = log(1+.p$basic$discQ)) + ) + + sum(df$auc) +} + + +#' Function to estimate area under a curve assuming exponential line between points +auc_step <- function(t0, t1, S0, u0, u1, m, r) { + # Calculate the area under the curve + # S0 * exp(-m * (t - t0)) * exp(-r * t) * u(t) + # between t = t0 and t = t1, where + # u(t) = a * t + b + # u(t0) = u0 + # u(t1) = u1 + a <- (u1 - u0) / (t1 - t0) + b <- u0 - a * t0 + emrt0 <- exp(-(m+r)*t0) + emrt1 <- exp(-(m+r)*t1) + S0 * exp(m * t0) * ((a/((m+r)^2)) * (emrt0 * (1+(m+r)*t0) - emrt1 * (1+(m+r)*t1)) + (b/(m+r)) * (emrt0 - emrt1) + ) +} + + +#' Calculate the severity modifier (and other outputs) +#' +#' @param age Either a vector of ages (if supplying individual +#' patient data) or the mean age for a cohort. +#' @param sex Either a vector of sexes (which can be a logical +#' vector with TRUE for male and FALSE for female +#' or a character vector with "male" and "female", +#' or even "m" and "f"), or the proportion of the +#' cohort which is male. +#' @param qalys The discounted QALYs for somebody with the disease +#' receiving standard care. +#' @param .patient_level Logical. If TRUE, will treat `age` and `sex` as +#' vectors with individual patient data which are +#' aligned. If FALSE, will assume that `age` gives +#' the mean age, and `sex` gives the proportion of +#' the cohort which are male. If not provided, the +#' function will attempt to infer whether cohort +#' or individual patient data has been provided. +#' @param .i Allows to inject `i`, otherwise it will be sourced +#' from the global environment. +#' @param .p Allows for injection of `p` instead of looking +#' for it in the global environment. +calc_severity_modifier <- function(age, sex, qalys, .patient_level = NULL, .i = NULL, .p = NULL, format = "table") { + + # Check that sex and age are conformable + stopifnot(format %in% c("table", "console")) + stopifnot(length(age) == length(sex)) + + # If .patient_level is not specified, infer it + if (is.null(.patient_level)) .patient_level <- (length(sex) > 1) + + if (.patient_level) { + + # We know data.table is already a dependency, so go ahead and use it... + dt <- data.table::data.table(age = age, sex = sex) + dt[, n := .N, keyby = .(sex, age)] + dt[, qale := mapply(baseline_qale, age, sex, MoreArgs = list(.i = i, .p = p))] + qale <- dt[, sum(n * qale)/sum(n)] + + } else { + qale_f <- baseline_qale(age = age, sex = "female", .i = .i, .p = .p) + qale_m <- baseline_qale(age, "male", .i = .i, .p = .p) + qale <- sex * qale_m + (1 - sex) * qale_f + } + + val <- get_severity_modifier(qalys_disease = qalys, qalys_nodisease = qale, hard_code_SM = TRUE,format = format) + # val <- get_severity_modifier(qalys_disease = qalys, qalys_nodisease = qale, .i = .i) + + if (format == "table") { + data.frame( + qaly_soc = qalys, + qaly_gpop = qale, + abs_sf = val$abs_sf, + prop_sf = val$prop_sf, + modifier = val$modifer + ) + } else { + attr(val, "QALE") <- qale + attr(val, "QALY") <- qalys + class(val) <- "severity_modifier" + + return(val) + } + +} + +format.severity_modifier <- function(val) { + stringr::str_glue( + "{as.vector(val)}\uD7 (", + "QALE: {format(attr(val, 'QALE'), digits = 3, nsmall = 2)}; ", + "shortfall: {format(attr(val, 'QALE')-attr(val, 'QALY'), digits = 3, nsmall = 2)} [absolute], ", + "{format(1 - attr(val, 'QALY') / attr(val, 'QALE'), digits = 3, nsmall = 4)} [proportional]", + ")" + ) +} + +print.severity_modifier <- function(val) { + cat(format.severity_modifier(val), "\n") +} + diff --git a/3_Functions/misc/shift_and_pad.R b/3_Functions/misc/shift_and_pad.R new file mode 100644 index 0000000..35a9161 --- /dev/null +++ b/3_Functions/misc/shift_and_pad.R @@ -0,0 +1,21 @@ +# Shift a vector and pad with zeroes + +# takes a vector of n values and turn into a matrix +# eg x = {1,2,3,4,5} +# output = +# 1 0 0 0 0 +# 2 1 0 0 0 +# 3 2 1 0 0 +# 4 3 2 1 0 +# 5 4 3 2 1 +f_shift_and_pad <- function(x) { + #little trick to allow use of apply function: + #add column number as first row of the matrix + shifted <- matrix(0,nrow = length(x) + 1, ncol = length(x)) + shifted[1,] <- seq(1:ncol(shifted)) + shifted[-1,] <- x + + shifted <- apply(shifted, 2, function(x) shift(x[-1], x[1]-1, fill=0)) + + shifted +} \ No newline at end of file diff --git a/3_Functions/misc/structure.R b/3_Functions/misc/structure.R new file mode 100644 index 0000000..c9c596b --- /dev/null +++ b/3_Functions/misc/structure.R @@ -0,0 +1,82 @@ +#' Function which creates an empty version of the parameters list p and populates +#' it using the inputs from i +#' +#' @param i raw extracted inputs from Excel +#' +f_misc_param_generate_p <- function(i) { + + p <- list( + basic = list( + th = ceiling(i$ui_time_horizon * 365.25 / 7), + th_y = i$ui_time_horizon, + cl_d = i$i_cycle_length_weeks*7, + cl_w = i$i_cycle_length_weeks, + cl_y = i$i_cycle_length_weeks*7/365.25, + discQ = i$ui_disc_qaly, + discC = i$ui_disc_cost, + structure = str_trim(i$dd_model_struct) + ), + demo = list( + table = data.table(i$R_table_ptchar) + ), # patient demographic data + seq = list(), # treatment sequences. same for both model structures. + surv = list(), # Survival extrapolations (if needed for iteration, i.e. in part surv model). same for both model structures + releff = list(), # Relative efficacy network, for use populating the disease model. same for both model structures + costs = list( + mk = list(), + ps = list(), + settings = list( + subsTx = data.table(i$R_table_sub_txts_prop_n_costs) + ) + ), # All drug inputs after they've been processed and tidied up + util = list( + mk = list(), + ps = list() + ), # All inputs to apply to the disease model + ae = list( + mk = list(), + ps = list() + ), # Inputs to generate AE matrices AC and AQ + misc = list( + mk = list(), + ps = list(), + plot = list( + xlim_survplots_yr = 20 + ) + ) + ) + + p$basic$n_cyc <- ceiling(p$basic$th) # rounding up to a whole number + p$basic$t_cyc <- rep(0:p$basic$n_cyc) + p$basic$t_yr <- p$basic$t_cyc*7/365.25 + + # discount factors - base-case edition. These will get replaced for scenarios affecting + # discount rates or the time horizon + p$basic$discFacQ <- f_discFac(p$basic$discQ,p$basic$t_yr) + p$basic$discFacC <- f_discFac(p$basic$discC,p$basic$t_yr) + + return(p) + +} + + +#' function to pull out the HRs being used in the network. Only for time-invariant HRs of course +f_releff_extract_all_HRs <- function(network) { + lapply(network,function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + do.call( + rbind, + lapply(mol, function(tr) { + unlist(lapply(tr, function(endp) { + endp$hr + })) + }) + ) + }) + }) + }) +} + + + diff --git a/3_Functions/patient_flow/ae.R b/3_Functions/patient_flow/ae.R new file mode 100644 index 0000000..054e0a0 --- /dev/null +++ b/3_Functions/patient_flow/ae.R @@ -0,0 +1,5 @@ +# Function is incorporated as code within f_pf_computePF_mk + +f_pf_mk_ae <- function(TRACE,basic,ae) { + +} \ No newline at end of file diff --git a/3_Functions/patient_flow/drug_costs.R b/3_Functions/patient_flow/drug_costs.R new file mode 100644 index 0000000..993b833 --- /dev/null +++ b/3_Functions/patient_flow/drug_costs.R @@ -0,0 +1,13 @@ +# Function is incorporated as code within f_pf_computePF_mk + +# State transition (markov) model ----------------------------------------- + +#' function to compute drug costs for the Markov model using the full expanded trace +#' and drug cost inputs. basic is used for discount factor and time +f_pf_mk_drug_costs <- function(TRACE, basic, drug) { + +} + + + + diff --git a/3_Functions/patient_flow/hcru_costs.R b/3_Functions/patient_flow/hcru_costs.R new file mode 100644 index 0000000..80097c0 --- /dev/null +++ b/3_Functions/patient_flow/hcru_costs.R @@ -0,0 +1,5 @@ +# Function is incorporated as code within f_pf_computePF_mk + +f_pf_mk_hcru_costs <- function(TRACE, basic, hcru) { + +} \ No newline at end of file diff --git a/3_Functions/patient_flow/markov.R b/3_Functions/patient_flow/markov.R new file mode 100644 index 0000000..1feceec --- /dev/null +++ b/3_Functions/patient_flow/markov.R @@ -0,0 +1,938 @@ +#' Function to compute the TPs that populate matrix M using a set of extrapolations +#' which follow the structure resulting from using function `f_seq_extrapCollector` +#' to "collect" extrapolations. +#' +#' @param st_list the result of `f_seq_extrapCollector` (i.e. a set of extrapolated survival) +#' +#' @details cycles through treatment lines (`f_seq_extrapCollector` reduces down to those required) +#' pulling out the TTD, TTP (censoring for death) and PFS lines. These +#' are then used to compute transition probabilities between health states +#' for use within `M` later. For on-treatment states, next_on death_on +#' disc_on and stay_on are computed, and for off-treatment states +#' next_off death_off and stay_off are computed. This then creates +#' a full set of time-varying transition probabilities from time 0 +#' to the time horizon. For BSC, only OS is required. +#' +#' +#' +f_pf_mk_ComputeTPs <- function(st_list) { + out <- lapply(1:length(st_list), function(trt_li) { + + # Figure out the time horizon: + tl <- st_list[[trt_li]] + th <- length(tl$OS$st) + zeros <- rep(0,th) + + if (trt_li < length(st_list)) { + # Pull out the endpoints for ease of reading: + TTD <- tl$TTD$st + TTP <- tl$TTP$st + PFS <- tl$PFS$st + + # Calculate estimated transition probabilities out of those curves: + tp_TTD <- 1-(TTD/shift(TTD,fill=1)) + tp_TTP <- 1-(TTP/shift(TTP,fill=1)) + tp_PFS <- 1-(PFS/shift(PFS,fill=1)) + + # Apply assumptions to figure out TP from each state to each other state: + disc <- tp_TTD + + # ON-treatment transition probabilities + + # probability of death directly from on treatment state: + death_on <- 1 - tp_TTP - (1-tp_PFS) + + # Going directly onto next line from the on treatment state + + # The probability of discontinuation removing the probability of any other event + disc_only <- disc - tp_TTP - death_on + disc_only_adj <- pmax(disc_only, 0) + + # The probability of going to next therapy but not death directly from on-treatment + next_on <- tp_TTP - (disc_only_adj - disc_only) + + # the probability of staying on treatment is simply the inverse of the probability + # of going off treatment for any reason + stay_on <- 1 - disc + + # The probability of discontinuation when on treatment + disc_on <- disc_only_adj + + # Just to note some equivalence here: + # 1 - next_on - disc - stay_on == 1 - tp_TTP - (1-tp_PFS) + + next_off <- next_on + death_off <- death_on + stay_off <- 1 - death_on - next_on + + } else { + # Pull out the endpoints for ease of reading: + OS <- tl$OS$st + + # Calculate estimated transition probabilities out of those curves: + tp_OS <- 1-(OS/shift(OS,fill=1)) + + # Apply assumptions to figure out TP from each state to each other state: + stay_on <- 1-tp_OS + disc_on <- zeros + next_on <- zeros + death_on <- tp_OS + + next_off <- zeros + death_off <- zeros + stay_off <- zeros + + # Testing sum to 1: + + } + + # return a list of TPs for each of the 7 required to compute the Markov model + return(list( + disc_on = disc_on, + stay_on = stay_on, + next_on = next_on, + death_on = death_on, + stay_off = stay_off, + next_off = next_off, + death_off = death_off + )) + + }) + names(out) <- c(paste0("L",1:(length(st_list)-1)),"NT") + return(out) +} + + +#' Function to compute Markov traces when using a state-transition framework. +#' +#' @param pops string with e.g. 'pop_0' to run risk population 0. make sure those populations exist. +#' @param basic p$basic from p +#' @param sequences p$seq from p +#' @param survival p$surv from p +#' @param costs p$costs$mk from p +#' @param util List containing hsuv (p$util$mk) and gpop (p$util$gpop) +#' @param ae p$ae$mk from p +#' @param eff_table p$releff$excel_table from p +#' @param verbose whether or not the user wants verbose output whilst computing +#' @param include_plots Whether to generate trace plots or not +#' @param just_nlines optional. lets you run just one set of nlines (e.g. just 1 active treatment line or 2) +#' +f_pf_computePF_mk <- + function(pops, + basic, + demo, + sequences, + survival, + costs, + util, + ae, + eff_table, + verbose = FALSE, + include_plots = FALSE, + just_nlines = NULL, + just_seq = NULL) { + + # Compute the maximum active treatment lines possible by going through all sequences + # and figuring it out + max_active <- max(sapply(sequences$qc,ncol))-1 + + # pull out our lookup tables for cross referencing so it's automatic and easy + # to read + lookup <- basic$lookup + pop_map <- lookup$pop_map + + lapply(pops, function(popu) { + + # Get a number for the population for use subsetting and filtering stuff. + popu_n <- as.integer(gsub("pop_","",popu)) + + overall_pop <- as.list(pop_map[Overall.population.number == popu_n,]) + + f_misc_colcat( + paste0( + "ST model. Population ", + popu_n, + "\t| Sequences: '", + overall_pop$Sequencing.population, + "'\t | 1L Risk: '", + overall_pop$Risk.population, + "', further lines assumed all population" + ) + ) + # Map the population appropriately using pop_map as derived above: + + # Basically, get the risk and sequence populations and use those to extract + # the correct sequences and extrapolations, then go from there. + rpop_n <- overall_pop$Risk.population.number + rpop <- paste0("pop_",rpop_n) + + spop_n <- overall_pop$Sequencing.population.number + spop <- paste0("pop_",spop_n) + + # Pull out the demographic data for our patients. We can use this later + # (e.g. line 1 baseline age and sex for utiltiy adjustment) + # + # This is based on risk population - note these are baseline population + # and everyone is in the risk population at baseline! + demog <- demo[[rpop]] + + + # First-line included treatments for THIS risk population + L1_inc_rpop <- sort(unique(eff_table[Treatment.line == 1 & + Population == rpop_n & + Include.in.this.analysis. == "Yes", ]$Molecule)) + + # All further lines are risk population 0 only: + L2p_inc_rpop <- lapply(2:max_active, function(line_n) { + sort(unique(eff_table[Treatment.line == line_n & + Population == 0 & Include.in.this.analysis. == "Yes", ]$Molecule)) + }) + + + # Stick them together so that we can go through all the treatment sequences + # further reducing them down to those that are allowed via efficacy, NMAs + # and relative efficacy assumptions / exogenous data. + trt_filter <- c(list(L1_inc_rpop),L2p_inc_rpop) + rm(L1_inc_rpop) + rm(L2p_inc_rpop) + + # Get the treatment sequences for this population + ateachline <- sequences$n[[spop]] + ateachline$nlines <- ncol(ateachline)-rowSums(is.na(ateachline)) + + # Split them up by the number of treatment lines: + molecule_list_full <- lapply(2:(ncol(ateachline)-1), function(n_lines) { + ateachline[nlines==n_lines,1:n_lines] + }) + + # Go through each of these, filtering down using the per line allowable molecules. + # This then imposes exclusion via excel (i.e. data/assumption) as well as the rule set for providing + # each treatment sequence (i.e. policy). + + sequence_list <- lapply(molecule_list_full, function(lines_amount) { + Reduce( + x = 1:(ncol(lines_amount)-1), + init = lines_amount, + accumulate = FALSE, + f = function(prev, treatment_line) { + + # Find out which molecules are in that line column and filter the table + molecules_allowed <- trt_filter[[treatment_line]] + which_to_allow <- which(prev[[treatment_line]] %in% molecules_allowed) + return(prev[which_to_allow,]) + } + ) + }) + + # User can make it just run one set of nlines. mainly for QC + if (!is.null(just_nlines)) { + sequence_list <- sequence_list[just_nlines] + names(sequence_list) <- paste0("active_lines_",just_nlines) + } else { + names(sequence_list) <- paste0("active_lines_",1:length(sequence_list)) + } + + # Before we cycle down the individual treatment sequences, we need to generate + # the appropriate utility multiplier to be applied to health state utility values + # to account for the ageing of the population. This vector is then compiled into + # a matrix with TH rows and the maximum possible number of columns in the + # consolidated trace. This matrix can then be multiplied by the dagonal + # matrix of HSUVs relevant to a sequence to produce the HSUV matrix accounting for + # ageing. This matrix can be element-wise multiplied by the consolidated trace + # to produce undiscounted QALYs. The undiscounted QALYs can be discounted + # using the discount factor in basic$discFacQ to produce the discounted version + + + # Generate gpop line + util_gpop_mat <- matrix( + util$gpop[[rpop]], # R will recycle this automatically + nrow = basic$th+1, + ncol = (max_active*2) +1 + 1 + ) + + + # Each one of these sequences requires a Markov trace and a unique id. + # when qcing: amount_of_lines <- sequence_list[[1]] + + return(lapply(sequence_list, function(amount_of_lines) { + + cat(paste0("M: pathways with ", ncol(amount_of_lines)-1, " active treatment lines \n")) + + seq_id <- 1:nrow(amount_of_lines) + names(seq_id) <- paste0("seq_",seq_id) + + if (!is.null(just_seq)) { + seq_id <- seq_id[just_seq] + } + + with_progress({ + pr <- progressr::progressor(along = seq_id, message = paste0("Pop ", popu, ", ", ncol(amount_of_lines)-1, " ATLs:")) + future_lapply(seq_id, future.chunk.size = 1, function(seq_n) { + + # lapply(seq_id, function(seq_n) { + number_of_lines <- ncol(amount_of_lines) + + # Pull out this specific treatment sequence: + trt_seq <- structure(paste0("mol_",amount_of_lines[seq_n,]),.Names=names(amount_of_lines[seq_n,])) + trts_n <- as.integer(gsub("mol_","",trt_seq)) + + pr(paste0("sequence: ",paste(trts_n,collapse = "|"))) + + trt_seq_plotLab <- trt_seq + names(trt_seq_plotLab) <- gsub("line_","L",names(trt_seq)) + names(trt_seq_plotLab)[length(names(trt_seq_plotLab))] <- "NT" + + treatment_names <- basic$lookup$ipd$mol[match(trts_n,basic$lookup$ipd$mol$Number),Description] + treatment_abbr <- basic$lookup$ipd$mol[match(trts_n,basic$lookup$ipd$mol$Number),RCC_input_desc] + + if (verbose) f_misc_colcat( + paste0( + "M: ", + ncol(amount_of_lines) - 1, + " active line(s). seq: ", + seq_n, + " - ", + paste(paste(trt_seq,treatment_names, collapse = " -> "), collapse = " ") + ), + 31 + ) + + # Collect the extrapolations that are available for that sequence + # + # IMPORTANT NOTE: THIS FUNCTION USES POP 0 FOR ALL 2L+ LINES!!! + # + # Note the pop_n argument is numeric and for RISK POPULATION + extraps <- f_seq_extrapCollector( + treatment_sequence = trt_seq, + st = survival$st, + lookups = lookup, + pop_n = rpop_n, + pop_0_2Lp = TRUE + ) + + # Calculate what we need + TPs <- f_pf_mk_ComputeTPs(st_list = extraps$st) + + # Correct for non-finite TPs: + TPs <- lapply(TPs, function(li) { + lapply(li, function(tp) { + tp[!is.finite(tp)] <- 0 + if (length(tp < (basic$th+1))) { + tp <- c(tp,rep(0,basic$th+1 - length(tp))) + } + tp + }) + }) + + # Corrected time horizon: + th <- length(TPs$L1$disc_on) + + # death is certain at the end of the time horizon. impossible to get here + # for any 2L+ state anyway since it takes 1 cycle to get there and therefore + # the time horizon + line - 1 is the last cycle possible. + TPs <- lapply(TPs, function(li) { + if ("death_on" %in% names(li)) li$death_on[th] <- 1 + if ("death_off" %in% names(li)) li$death_off[th] <- 1 + return(li) + }) + + # For computing L1 off trt entrants: + L1_onoff_adjustment <- TPs$L1$death_on + TPs$L1$next_on + + # Compile M using these TPs + + M <- f_markov_M_compiler_TPupdate( + tp = TPs, + n_lines = number_of_lines-1, + TH = th, + verbose = verbose + ) + + # Compute Markov trace + TRACE <- f_markov_M_extrapolator( + L1_tp = TPs$L1, + TH = th, + M = M, + N = f_markov_calcN(n_lines = number_of_lines-1, TH = th), + prog = verbose, + verbose = verbose + ) + + # discounted trace for the costs (diag.M is rowwise, M.diag is columnwise) + DTRACE <- basic$discFacC * TRACE + + # column-wise recycling tested per the below: + # DTRACE2 <- Diagonal(length(basic$discFacC),basic$discFacC) %*% TRACE + # identical(DTRACE,DTRACE2) + # rm(DTRACE2) + + # we can drop M to save memory, as we don't use it again + rm(M) + rm(TPs) + + # Generates entrant list + + # Return the reduced traces per the model's requirements. + consolidated_trace <- f_markov_traceConsolidator( + full_trace = TRACE, + split_list = NULL, + L1_onoff_adjustment = L1_onoff_adjustment, + TH = th, + n_lines = number_of_lines-1, + discFacQ = basic$discFacQ, + discFacC = basic$discFacC + ) + + # Here we will use TRACE (the expanded trace) to compute drug costs + # and mru costs because they are too complicated to multiply by simple + # numbers. + # by line and treatment status (and component for HCRU + drug costs). + # + # This will replace returning TRACE in the output + + if (verbose) cat("Computing model outcomes per cycle per cycle-in-state") + + # rename the colstarts to something we can refer to automatically: + names(consolidated_trace$col_starts) <- Reduce( + x = 1:(length(trt_seq)-1), + init = names(consolidated_trace$col_starts), + accumulate = FALSE, + f = function(prev, li) { + gsub(paste0("L",li),trt_seq[li],prev) + } + ) + names(consolidated_trace$col_starts)[which(names(consolidated_trace$col_starts) == "BSC")] <- "mol_999_on" + + # cycle through the treatment sequence, computing the different cost components: + + # abbreviate cost per cycle (cpc) and costs on initation (coi). we only need the + # mols which we are using for this sequence. one off are for all. + cpc <- costs$per_cycle + coi <- costs$one_off$Prog + cod <- costs$one_off$Death + + pf_costs <- lapply(1:length(trt_seq), function(line_n) { + mol <- trt_seq[line_n] + + if (line_n == 1) { + # First line, we don't need to do tunnel multiplication: + + tr_1l_on <- TRACE[,1] + tr_1l_off <- TRACE[,2] + + drug_cost <- cpc$drug[[names(mol)]][[mol]] * tr_1l_on + admin_cost <- cpc$admin[[names(mol)]][[mol]] * tr_1l_on + ruc_on <- cpc$mru_on[[names(mol)]][[mol]] * tr_1l_on + ruc_off <- cpc$mru_off[[names(mol)]][[mol]] * tr_1l_off + + rm(tr_1l_on) + rm(tr_1l_off) + + # Discounted version: + dtr_1l_on <- DTRACE[,1] + dtr_1l_off <- DTRACE[,2] + + ddrug_cost <- cpc$drug[[names(mol)]][[mol]] * dtr_1l_on + dadmin_cost <- cpc$admin[[names(mol)]][[mol]] * dtr_1l_on + druc_on <- cpc$mru_on[[names(mol)]][[mol]] * dtr_1l_on + druc_off <- cpc$mru_off[[names(mol)]][[mol]] * dtr_1l_off + + rm(dtr_1l_on) + rm(dtr_1l_off) + + } else if (line_n < length(trt_seq)) { + # For the central states we define diagonal matrices, multiply + # by the appropriate columns in TRACE, then calculate rowsums! + + # Add in one-off costs upon entering this treatment line: + mru_on_tx <- cpc$mru_on[[names(mol)]][[mol]] + mru_on_tx[1] <- mru_on_tx[1] + coi + + dc <- Diagonal(n = th, x = cpc$drug[[names(mol)]][[mol]]) + ac <- Diagonal(n = th, x = cpc$admin[[names(mol)]][[mol]]) + ru_on <- Diagonal(n = th, x = mru_on_tx) + ru_off <- Diagonal(n = th, x = cpc$mru_off[[names(mol)]][[mol]]) + + start_col <- consolidated_trace$col_starts[grep(paste0(mol,"_"),names(consolidated_trace$col_starts))] + on_cols <- start_col[grep("_on",names(start_col))]:(start_col[grep("_on",names(start_col))] + th - 1) + off_cols <- start_col[grep("_off",names(start_col))]:(start_col[grep("_off",names(start_col))] + th - 1) + + tr_on <- TRACE[,on_cols] + + drug_cost <- rowSums(tr_on %*% dc) + admin_cost <- rowSums(tr_on %*% ac) + ruc_on <- rowSums(tr_on %*% ru_on) + ruc_off <- rowSums(TRACE[,off_cols] %*% ru_off) + + rm(tr_on) + + dtr_on <- DTRACE[,on_cols] + + ddrug_cost <- rowSums(dtr_on %*% dc) + dadmin_cost <- rowSums(dtr_on %*% ac) + druc_on <- rowSums(dtr_on %*% ru_on) + druc_off <- rowSums(DTRACE[,off_cols] %*% ru_off) + + rm(dtr_on) + rm(dc) + rm(ac) + rm(ru_on) + rm(ru_off) + rm(start_col) + rm(on_cols) + rm(off_cols) + + } else { + + # Future switch? Add in one-off costs upon entering this treatment line: + # In future, link this to the excel switch for applying this cost + # upon starting BSC. + if (TRUE) { + # Sometimes BSC is coming after any active treatment line. + # The cost should come from the last active treatment line's BSC + # values: + if (is.null(cpc$mru_on[[names(mol)]])) { + # reduce line id by 1 just for pulling out the right data + # from cpc: + namline_n <- as.numeric(gsub("line_","",names(mol))) + line_nam_temp <- paste0("line_",namline_n-1) + + # Now use this adjusted name to pull out the right values + mru_on_tx <- cpc$mru_on[[line_nam_temp]][[mol]] + } else { + mru_on_tx <- cpc$mru_on[[names(mol)]][[mol]] + } + # Now add the cost on initiation (coi) + mru_on_tx[1] <- mru_on_tx[1] + coi + } + + # we're in BSC, we do the same as above but have to pull some of the costs + # for BSC from previous line, and there's no drug costs + ru_on <- Diagonal(n = th, x = mru_on_tx) + + drug_cost <- rep(0,th) + admin_cost <- rep(0,th) + ddrug_cost <- rep(0,th) + dadmin_cost <- rep(0,th) + + start_col <- consolidated_trace$col_starts[grep(mol,names(consolidated_trace$col_starts))] + on_cols <- start_col:(start_col + th - 1) + + ruc_on <- rowSums(TRACE[,on_cols] %*% ru_on) + druc_on <- rowSums(DTRACE[,on_cols] %*% ru_on) + ruc_off <- rep(0,th) + druc_off <- ruc_off + + rm(ru_on) + rm(on_cols) + } + + # Ok now no matter how many lines and their order we've done drug cost + # with full memory for our treatment sequence -_- + + return(list( + molecule = mol, + undisc = list( + drug = drug_cost, + admin = admin_cost, + mru_on = ruc_on, + mru_off = ruc_off + ), + disc = list( + drug = ddrug_cost, + admin = dadmin_cost, + mru_on = druc_on, + mru_off = druc_off + ) + )) + }) + names(pf_costs) <- names(trt_seq) + + + # We now no longer need TRACE (or discounted trace) and can drop it to free up (a lot of) memory + rm(TRACE) + rm(DTRACE) + rm(cpc) + rm(coi) + + # Finally, add in end of life costs by multiplying the entrants to death from consolidated + # trace by the cost upon death + pf_costs$eol <- list( + undisc = consolidated_trace$entrants[,ncol(consolidated_trace$entrants)] * cod, + disc = consolidated_trace$disc$C$entrants[,ncol(consolidated_trace$disc$C$entrants)] * cod + ) + rm(cod) + + # QALYs and AEs are calculated using the consolidated traces: + # pf_qalys + + # Get the HSUVs for the active treatment line by treatment status: + hsuv_active <- unlist(lapply(1:(number_of_lines-1), function(line_n) { + + # ASSUMEs POPULATION 0 IF LATER LINES!!!! + if (line_n == 1) { + line_hsuv <- as.list(util$hsuv[Population == rpop_n & Treatment.line == line_n & Molecule == trts_n[line_n],list(OnTxt,OffTxt)]) + } else { + line_hsuv <- as.list(util$hsuv[Population == 0 & Treatment.line == line_n & Molecule == trts_n[line_n],list(OnTxt,OffTxt)]) + } + + lab_line <- paste0("L",line_n) + names(line_hsuv) <- paste0(lab_line,c("_on","_off")) + unlist(line_hsuv) + })) + # Get the HSUV for BSC: + hsuv_bsc <- structure( + util$hsuv[Population == 0 & Treatment.line == number_of_lines & Molecule == trts_n[number_of_lines],]$OnTxt, + .Names = paste0("L",number_of_lines,"_on") + ) + + # Stick them together to form the vector of HSUVs per possible state for + # this treatment sequence: + hsuv_unadj <- c(hsuv_active,hsuv_bsc,"dead"=0) + + # Expand this to the time horizon by multiplying column-wise by the gpop line. + # The result is U, which we can element-wise multiply by our consolidated trace + # to get our undiscounted QALYs :) + U <- util_gpop_mat[,1:length(hsuv_unadj)] %*% diag(hsuv_unadj) + + rm(hsuv_active) + rm(hsuv_bsc) + rm(hsuv_unadj) + + # Undiscounted QALYs (note AEs are separate as they're a separate thing): + pf_qalys <- list( + undisc = (consolidated_trace$full_lines * basic$cl_y) * U, + disc = (consolidated_trace$disc$Q$full_lines * basic$cl_y) * U + ) + + rm(U) + + # AEs undiscounted + + # pf_ae + # Same as above, compute your AE stuff here + + if(ae$approach[1] == "one-off") { + + ae_impact_atl <- do.call( + rbind, + lapply(1:(number_of_lines-1), function(line_n) { + c(cost = ae$per_cycle[ae$per_cycle$trt == treatment_abbr[line_n] & ae$per_cycle$line == line_n]$cost, + qaly = ae$per_cycle[ae$per_cycle$trt == treatment_abbr[line_n] & ae$per_cycle$line == line_n]$QALYs, + dur = mean( + ae$one_off$duration_weeks[ + ae$one_off$Molecule == trts_n[line_n] + & ae$one_off$Treatment.line == min(line_n,2) + ])) + }) + ) + + ae_impact_atl <- data.table(ae_impact_atl) + ae_oo_cost <- ae_impact_atl$cost * ae_impact_atl$dur + ae_oo_qaly <- ae_impact_atl$qaly * ae_impact_atl$dur + + ent <- data.table(as.matrix(consolidated_trace$entrants)) + ent <- rbindlist(list(data.frame(t(structure(rep(0,ncol(ent)),.Names=colnames(ent)))),ent)) + ent <- cbind(L1_on = c(1,rep(0,nrow(ent)-1)),ent) + + dent_c <- data.table(as.matrix(consolidated_trace$disc$C$entrants)) + dent_c <- rbindlist(list(data.frame(t(structure(rep(0,ncol(dent_c)),.Names=colnames(dent_c)))),dent_c)) + dent_c <- cbind(L1_on = c(1,rep(0,nrow(dent_c)-1)),dent_c) + + dent_q <- data.table(as.matrix(consolidated_trace$disc$Q$entrants)) + dent_q <- rbindlist(list(data.frame(t(structure(rep(0,ncol(dent_q)),.Names=colnames(dent_q)))),dent_q)) + dent_q <- cbind(L1_on = c(1,rep(0,nrow(dent_q)-1)),dent_q) + + nonzero_ind <- 1:number_of_lines + 0:(number_of_lines-1) + + nstate <- ncol(consolidated_trace$full_lines) + n_cyc <- nrow(consolidated_trace$full_lines) + nam_st <- colnames(consolidated_trace$full_lines) + + # clever trick to expand a vector + ae_oo_cost <- "[<-"(numeric(nstate), nonzero_ind, c(ae_oo_cost,0)) + ae_oo_qaly <- "[<-"(numeric(nstate), nonzero_ind, c(ae_oo_qaly,0)) + + # multiply payoff by entrants + pf_ae <- list( + undisc = list( + costs = matrix( + as.matrix(ent)[1:th,] %*% diag(ae_oo_cost), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ), + qalys = matrix( + as.matrix(ent)[1:th,] %*% diag(ae_oo_qaly), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ) + ), + disc = list( + costs = matrix( + as.matrix(dent_c)[1:th,] %*% diag(ae_oo_cost), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ), + qalys = matrix( + as.matrix(dent_q)[1:th,] %*% diag(ae_oo_qaly), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ) + ) + ) + + + } else { + # compute cost per cycle on treatment and qalys lost per cycle on treatment: + ae_cpc_ontrt <- unlist(lapply(1:number_of_lines, function(act_line_n) { + as.list(ae$per_cycle[line == act_line_n & molecule == trts_n[act_line_n],])$cost + })) + ae_qlpc_ontrt <- unlist(lapply(1:number_of_lines, function(act_line_n) { + as.list(ae$per_cycle[line == act_line_n & molecule == trts_n[act_line_n],])$QALYs + })) + # Make a correction for if we have 4 atl's: + if (number_of_lines == max_active+1) { + ae_cpc_ontrt[5] <- ae$per_cycle[line == max_active & molecule == trts_n[number_of_lines],]$cost + ae_qlpc_ontrt[5] <- ae$per_cycle[line == max_active & molecule == trts_n[number_of_lines],]$QALYs + } + nonzero_ind <- 1:number_of_lines + 0:(number_of_lines-1) + + # pad the cpc and qlpc with 0s: + ae_cpc <- "[<-"(numeric(ncol(consolidated_trace$full_lines)), nonzero_ind, ae_cpc_ontrt) + ae_qlpc <- "[<-"(numeric(ncol(consolidated_trace$full_lines)), nonzero_ind, ae_qlpc_ontrt) + + names(ae_cpc) <- colnames(consolidated_trace$full_lines) + names(ae_qlpc) <- colnames(consolidated_trace$full_lines) + + # Implement adverse events (yay) + pf_ae <- list( + undisc = list( + costs = matrix( + consolidated_trace$full_lines %*% diag(ae_cpc), + ncol = ncol(consolidated_trace$full_lines), + nrow = nrow(consolidated_trace$full_lines), + dimnames = list(NULL,colnames(consolidated_trace$full_lines)) + ), + qalys = matrix( + consolidated_trace$full_lines %*% diag(ae_qlpc), + ncol = ncol(consolidated_trace$full_lines), + nrow = nrow(consolidated_trace$full_lines), + dimnames = list(NULL,colnames(consolidated_trace$full_lines)) + ) + ), + disc = list( + costs = matrix( + consolidated_trace$disc$C$full_lines %*% diag(ae_cpc), + ncol = ncol(consolidated_trace$full_lines), + nrow = nrow(consolidated_trace$full_lines), + dimnames = list(NULL,colnames(consolidated_trace$full_lines)) + ), + qalys = matrix( + consolidated_trace$disc$Q$full_lines %*% diag(ae_qlpc), + ncol = ncol(consolidated_trace$full_lines), + nrow = nrow(consolidated_trace$full_lines), + dimnames = list(NULL,colnames(consolidated_trace$full_lines)) + ) + ) + ) + } + + if(include_plots) { + f_plot_mk_draw_consol_trace(consol_trace = consolidated_trace$full_lines, + treatment_names = treatment_names, + tmax = 15) + } + + + return(list( + population = popu, + trt_nam = treatment_names, + trace_consol = consolidated_trace$full_lines, + costs = pf_costs, + qalys = pf_qalys, + aes = pf_ae + )) + }) + }) + + + + })) + }) + +} + + + +# Summary functions ------------------------------------------------------- + + +#' Function which takes the output of the state transition model and +#' sums up to various different levels which are optional. For e.g. a PSA, +#' only the top line should be returned, or if drug cost is required to be separated +#' from total costs, then breakdown should be true. In a full deterministic analysis +#' full breakdowns can inform the tables which will go into reporting. +#' +#' @param pf_list the output from the state transition model (i.e. `pf$mk`) +#' @param disc_undisc either `disc` or `undisc` as text to tell the model to return discounted or undiscounted results +#' @param lookups the lookup tables we have used throughout the model (i.e. `p$basic$lookup`) +#' @param full_breakdown logical. if TRUE (default), return results by sequence, category and treatment line +#' @param breakdown logical. if TRUE (default), return results by sequence and category in one neat table +#' @param ypc years per cycle - used when "undisc" is entered for disc_undisc. `p$basic$cl_y` +#' +#' +f_pf_mk_summary <- function(pf_list, disc_undisc,lookups, full_breakdown = TRUE, breakdown = TRUE, ypc) { + + stopifnot(disc_undisc %in% c("disc","undisc")) + + if (disc_undisc == "disc") { + items_to_get <- c("trt_nam", "disc") + } else { + items_to_get <- c("trt_nam", "undisc", "trace_consol") + } + + # Tip: to get the first popu, do this: + # popu <- get_elem(pf$mk,items_to_get)[[1]] + lapply(get_elem(pf_list,items_to_get), function(popu) { + + index_names <- Reduce( + x = 1:length(popu), + init = lapply(popu,length), + accumulate = FALSE, + f = function(prev, this_line) { + prev[[this_line]] <- 1:prev[[this_line]] + names(prev[[this_line]]) <- paste0("seq_",prev[[this_line]]) + if (this_line > 1) { + prev[[this_line]] <- max(prev[[this_line]]) + prev[[this_line - 1]] + } + return(prev) + } + ) + + # Now collapse the list by one level + fixed_names <- Reduce( + x = names(popu), + init = list(), + accumulate = FALSE, + f = function(prev, lines_txt) { + out <- popu[[lines_txt]] + names(out) <- paste0("seq_",index_names[[lines_txt]]) + prev <- c(prev,out) + return(prev) + } + ) + + n_seq <- length(fixed_names) + + # Costs: + # Now we can just cycle through this flattened list summarising one at a time + # this_sequence <- fixed_names[[1]] + cost_full_breakdown <- lapply(fixed_names, function(this_sequence) { + costs_less_aes <- do.call( + rbind,lapply(this_sequence$costs, function(tx_line) { + if(class(tx_line) == "numeric") { + sum(tx_line) + } else { + unlist(lapply(tx_line,sum)) + } + }) + ) + + eol <- costs_less_aes["eol","mru_on"] + costs_less_aes <- costs_less_aes[paste0("line_",1:(length(this_sequence$trt_nam))),] + + costs_ae <- colSums(this_sequence$aes$costs) + costs_ae <- unlist(lapply(1:(length(this_sequence$trt_nam)), function(tx_n) { + if (tx_n < length(this_sequence$trt_nam)) { + sum(costs_ae[which(names(costs_ae) %in% paste0("L",tx_n,c("_on","_off")))]) + } else { + as.numeric(costs_ae["BSC"]) + } + })) + + return(cbind( + costs_less_aes, + ae_cost = costs_ae, + eol = c(rep(0,length(this_sequence$trt_nam)-1),eol) + )) + }) + cost_breakdown <- do.call(rbind,lapply(cost_full_breakdown,colSums)) + cost_total <- rowSums(cost_breakdown) + + # Now do the same with QALYs + qaly_full_breakdown <- lapply(fixed_names, function(this_sequence) { + qaly_less_aes <- matrix(colSums(this_sequence$qalys),ncol=1,dimnames = list(colnames(this_sequence$qalys),"qaly")) + qaly_ae <- matrix(colSums(this_sequence$aes$qalys),ncol=1,dimnames = list(colnames(this_sequence$aes$qalys),"ae_qaly")) + return(cbind(qaly_less_aes,qaly_ae)) + }) + qaly_breakdown <- do.call(rbind,lapply(qaly_full_breakdown,colSums)) + qaly_total <- rowSums(qaly_breakdown) + + out <- list() + if (full_breakdown) out$full_breakdowns <- list() + if (breakdown) out$breakdowns <- list() + + # make some unique id's for each sequence: + trt_n <- unlist(lapply(lapply(fixed_names, function(trt_seq) trt_seq$trt_nam), function(drug_names) { + paste(lookups$ipd$mol[match(drug_names,lookups$ipd$mol$Description),]$Number,collapse = "→") + })) + trt_numb <- lapply(lapply(fixed_names, function(trt_seq) trt_seq$trt_nam), function(drug_names) { + lookups$ipd$mol[match(drug_names,lookups$ipd$mol$Description),]$Number + }) + trt_txt <- unlist(lapply(fixed_names, function(x) paste(x$trt_nam, collapse="→")),use.names = FALSE) + + + # Now, if we're returning full breakdowns, we want to go by sequence, returning + # costs and qalys as 2 objects. + + if (full_breakdown) out$full_breakdowns <- lapply(structure(1:n_seq,.Names=paste0("seq_",1:n_seq)), function(this_seq) { + list( + n = trt_n[this_seq], + numb = trt_numb[[this_seq]], + txt = trt_txt[this_seq], + cost = cost_full_breakdown[[this_seq]], + qaly = qaly_full_breakdown[[this_seq]] + ) + }) + if (breakdown) { + out$breakdowns <- data.table( + trt_n = trt_n, + trt = trt_txt, + cost_breakdown, + qaly_breakdown + ) + + } + + out$res <- data.table( + trt_n = trt_n, + trt = trt_txt, + costs = cost_total, + qalys = qaly_total + ) + + if (disc_undisc == "undisc") { + life_years <- lapply(fixed_names, function(this_sequence) { + colSums(this_sequence$trace_consol * ypc)[1:(ncol(this_sequence$trace_consol)-1)] + }) + if (breakdown) { + out$ly <- list() + out$ly$breakdown <- data.table( + trt_n = trt_n, + trt = trt_txt, + rbindlist(lapply(life_years, function(x)data.table(t(data.frame(x)))),fill = TRUE) + ) + } + out$res$ly <- unlist(lapply(life_years,sum)) + } + + # Return the results table! + return(out) + + }) +} + diff --git a/3_Functions/patient_flow/overarching.R b/3_Functions/patient_flow/overarching.R new file mode 100644 index 0000000..aec2bf3 --- /dev/null +++ b/3_Functions/patient_flow/overarching.R @@ -0,0 +1,73 @@ +#' Function to compute "patient flow" i.e. either a Markov trace or state transition +#' This top level function simply routes to the appropriate underlying pf generating +#' function. Either for a state transition or ps model. +#' +#' @param p object p +#' @param struct the model structure. must be one of the dropdown options for named range i$dd_model_struct +#' @param verbose whether to print more to the console or not +#' @param plots whether to generate plots in the output +#' @param just_pop just run a subset of the risk populations. default is to run them ALL (a lot of iteration!) +#' @param just_nlines just run a subset of the pathways according to how many ACTIVE treatment lines. Markov only. +#' +f_pf_computePF <- + function(p, + struct = "State transition", + verbose = FALSE, + plots = FALSE, + just_pop = NULL, + just_nlines = NULL, + just_seq = NULL) { + + + # list the populations to simulate: + if (!is.null(just_pop)) { + if(0%in% just_pop) stop ("this is overall population, not risk population, it can't be 0.") + overall_pops <- structure(paste0("pop_",just_pop),.Names=paste0("pop_",just_pop)) + } else { + overall_pops <- structure( + paste0("pop_",p$basic$lookup$pop_map$Overall.population.number), + .Names=paste0("pop_",p$basic$lookup$pop_map$Overall.population.number) + ) + } + + # map pops for risk for the demographics: + rpop <- paste0("pop_",p$basic$lookup$pop_map[match(as.numeric(gsub("pop_","",overall_pops)),p$basic$lookup$pop_map$Overall.population.number),]$Risk.population.number) + + + + # If the dropdown is broken, then return an error + if(!struct %in% c("State transition", "Partitioned survival")) { + stop ("structure must be at least one of the dropdown options in Excel (named range List_model_structures)") + } + + # Otherwise it has to be one of the below: + if (struct == "State transition") { + out <- f_pf_computePF_mk( + pops = overall_pops, + basic = p$basic, + demo = p$demo$live[which(names(p$demo$live) %in% rpop)], + sequences = p$seq, + survival = p$surv, + costs = list(per_cycle = p$costs$mk, one_off = p$costs$oneoff_mk), + util = list(hsuv = p$util$mk, gpop = p$util$gpop), + ae = list(one_off = p$ae$duration, per_cycle = p$ae$mk$per_cycle, approach = p$ae$approach), + eff_table = p$releff$excel_table, + verbose = verbose, + include_plots = plots, + just_nlines = just_nlines, + just_seq = just_seq + ) + } else { + out <- f_pf_computePF_ps( + pops = overall_pops, + basic = p$basic, + p = p, + cyclecosts = p$costs$mk, + oneoffcosts = p$costs$oneoff, + util = list(hsuv = p$util$mk, gpop = p$util$gpop), + ae = list(one_off = p$ae$duration, per_cycle = p$ae$mk$per_cycle, approach = p$ae$approach), + substrt = p$substrt + ) + } + return(out) +} \ No newline at end of file diff --git a/3_Functions/patient_flow/partitioned_survival.R b/3_Functions/patient_flow/partitioned_survival.R new file mode 100644 index 0000000..2883cad --- /dev/null +++ b/3_Functions/patient_flow/partitioned_survival.R @@ -0,0 +1,339 @@ +#' Function for patient flow for state residency for partitioned survival for active treatment lines +#' +#' @param st_for_1L survival s at time t for first-line, in the list structure using names OS PFS TTD for endpoints +#' +f_pf_sr_ps_atl <- function(st_for_1L) { + + s <- get_elem(st_for_1L,"st") + + # compute % in each possible state at each point in time for this 1L treatment, + # creating a partitioned survival model for the 1L+ for this sequence + + PFS_on <- c(1,pmin(s$PFS, s$TTD))[1:length(s$OS)] + PFS_off <- c(0,pmax(s$PFS - s$TTD,0))[1:length(s$OS)] + PPS_on <- c(0,pmax(s$TTD - s$PFS,0))[1:length(s$OS)] + PPS_off <- c(0,s$OS - pmax(s$TTD, s$PFS))[1:length(s$OS)] + dead <- c(0,1-s$OS)[1:length(s$OS)] + + stopifnot(f_misc_all_same_length(PFS_on, PFS_off, PPS_on, PPS_off, dead)) + + matrix( + data = c(PFS_on, PFS_off, PPS_on, PPS_off, dead), + ncol = 5, + dimnames = list(NULL,c( + "PFS_on", + "PFS_off", + "PPS_on", + "PPS_off", + "dead" + )) + ) +} + + + + +#' Function to compute state residency for use in partitioned survival modelling +#' +#' +#' +f_pf_partSA_state_res <- + function(treatment_sequence, + st, + t_yr, + lookup, + pop_n, + line_n, + discFacC, + discFacQ, + include_plot = FALSE, + plot_x_lim = 20) { + + + # Collect the correct extrapolations + extraps <- f_seq_extrapCollector( + treatment_sequence = treatment_sequence, + st = st, + lookups = lookup, + pop_n = pop_n + ) + + # Create an empty list to house the output and populate it as we go to avoid + # repeatedly copy pasting things + out <- list() + + # compute undiscounted state residency in each state at first-line ONLY + out$sr$undisc <- as.list(as.data.table(f_pf_sr_ps_atl(st_for_1L = extraps$st[[paste0("line_", line_n)]]))) + + # Compute discounted state residency for the two different discount rates: + out$sr$disc <- list( + C = lapply(out$sr$undisc, function(endp){ + endp * discFacC + }), + Q = lapply(out$sr$undisc, function(endp){ + endp * discFacQ + }) + ) + + # Calculate the proportion that will die every cycle per OS for our 1L+ model: + out$sr$undisc$ddeath_dt <- c(0,diff(out$sr$undisc$dead)) + out$sr$disc$C$ddeath_dt <- out$sr$undisc$ddeath_dt * discFacC + + # Calculate the proportion that progress every cycle for our 1L+ model: + # This assumes deaths are distributed evenly between progression free and progressed health states + prop_deathinPFS <- p$surv$prop_deathinPFS # proportion of PFS events which are death + progtotal <- out$sr$undisc$PFS_on + out$sr$undisc$PFS_off + out$sr$undisc$prog_dt <- c(0, diff(-progtotal)) * (1 - prop_deathinPFS) + out$sr$disc$C$prog_dt <- out$sr$undisc$prog_dt * discFacC + out$sr$disc$Q$prog_dt <- out$sr$undisc$prog_dt * discFacQ + + # The state PPS_off is then the proportion that are in subsequent lines of treatment + # beyond the first. + + if(include_plot) out$sr_plot <- f_plot_srPlot(out$sr$undisc,t_yr, plot_x_lim) + + return(out) + +} + + +f_pf_computePF_ps <- function(pops, basic, p, cyclecosts, oneoffcosts, util, ae, substrt){ + + # pull out our lookup tables for cross referencing so it's automatic and easy + # to read + lookup <- basic$lookup + pop_map <- lookup$pop_map + + + lapply(pops, function(popu) { + + # Get a numeric index for population for the functions, whilst also + # preserving the auto naming by the lapply: + popu_n <- as.integer(gsub("pop_","",popu)) + cat(paste0("population: ", popu_n, "\n")) + + overall_pop <- as.list(pop_map[Overall.population.number == popu_n,]) + + f_misc_colcat( + paste0( + "PartSA model. Population ", + popu_n, + "\t| Sequences: '", + overall_pop$Sequencing.population, + "'\t | 1L Risk: '", + overall_pop$Risk.population, + "', further lines assumed all population" + ) + ) + # Map the population appropriately using pop_map as derived above: + + # Basically, get the risk and sequence populations and use those to extract + # the correct sequences and extrapolations, then go from there. + rpop_n <- overall_pop$Risk.population.number + rpop <- paste0("pop_",rpop_n) + + spop_n <- overall_pop$Sequencing.population.number + spop <- paste0("pop",spop_n) + + # Pull out the demographic data for our patients. We can use this later + # (e.g. line 1 baseline age and sex for utiltiy adjustment) + # + # This is based on risk population - note these are baseline population + # and everyone is in the risk population at baseline! + demog <- p$demo$live[[rpop]] + + # Generate gpop line + util_gpop_mat <- matrix( + util$gpop[[rpop]], # R will recycle this automatically + nrow = basic$th+1, + ncol = (1*2) +1 + 1 # only one line of treatment in the PartSA model + ) + + # make a vector to apply costs and QALYs for AEs as one off + + first_cycle <- rep(0,basic$th+1) + first_cycle[1] <- 1 + + # Make an empty list to populate with the results: + sm <- list( + sr = NULL, + trt_1L = unique(p$releff$excel_table[Treatment.line == 1 & Population == rpop_n & Include.in.this.analysis. == "Yes",]$Molecule) + ) + + # Further filter down the list of 1st line treatments that are available using + # the output of the sequencing + sm$trt_1L <- sm$trt_1L[sm$trt_1L %in% unique(p$seq$n[[paste0("pop_",spop_n)]]$line_1)] + + + # generate state residency by treatment status for all of these 1L treatments: + PartSA_pf <- lapply(1:length(sm$trt_1L), function(fl_tx) { + + tx_seq <- c("line_1" = paste0("mol_",sm$trt_1L[fl_tx])) + mol_num <- sm$trt_1L[fl_tx] + + cat(paste0("molecule: ", tx_seq[1],"\n")) + mol_name <- p$basic$lookup$ipd$mol[match(mol_num ,p$basic$lookup$ipd$mol$Number),RCC_input_desc] + + sr <- f_pf_partSA_state_res( + treatment_sequence = tx_seq, + st = p$surv$st, + t_yr = p$basic$t_yr, + lookup = p$basic$lookup, + pop_n = rpop_n, + line_n = 1, + discFacC = p$basic$discFacC, + discFacQ = p$basic$discFacQ, + include_plot = TRUE, + plot_x_lim = 20 + ) + + # Undiscounted costs + + drug_cost <- cyclecosts$drug[[1]][[tx_seq ]] * (sr$sr$undisc$PFS_on + sr$sr$undisc$PPS_on) + admin_cost <- cyclecosts$admin[[1]][[tx_seq ]] * (sr$sr$undisc$PFS_on + sr$sr$undisc$PPS_on) + + substrt_admin_cost <- substrt$partsa[Treatment==mol_name & Population == spop, admin_cost] * sr$sr$undisc$prog_dt + substrt_drug_cost <- substrt$partsa[Treatment==mol_name & Population == spop, drug_cost] * sr$sr$undisc$prog_dt + substrt_AE_cost <- substrt$partsa[Treatment==mol_name & Population == spop, AE_cost] * sr$sr$undisc$prog_dt + + ruc_preprog <- cyclecosts$mru_on[[1]][[tx_seq ]] * sr$sr$undisc$PFS_on + cyclecosts$mru_off[[1]][[tx_seq ]] * sr$sr$undisc$PFS_off + + sum(oneoffcosts[Apply.to == "Init"]$cost) * first_cycle + ruc_postprog <- cyclecosts$mru_on[[1]][[tx_seq ]] * sr$sr$undisc$PPS_on + cyclecosts$mru_off[[1]][[tx_seq ]] * sr$sr$undisc$PPS_off + + EOL_cost <- sum(oneoffcosts[Apply.to == "Death"]$cost) * sr$sr$undisc$ddeath_dt + prog_cost <- (sum(oneoffcosts[Apply.to == "Prog"]$cost) + sum(oneoffcosts[Apply.to == "Init"]$cost))* sr$sr$undisc$prog_dt + + if(ae$approach == "one-off") { + AE_dur <- mean(ae$one_off[Treatment.line==1 & Treatment.name==mol_name, duration_weeks]) + AE_cost <- AE_dur * ae$per_cycle[line==1 & trt==mol_name, cost] * first_cycle + } else { + + AE_cost <- ae$per_cycle[line==1 & trt==mol_name, cost]*(sr$sr$undisc$PFS_on+sr$sr$undisc$PPS_on) + } + + # Discounted costs + + ddrug_cost <- cyclecosts$drug[[1]][[tx_seq ]] * (sr$sr$disc$C$PFS_on + sr$sr$disc$C$PPS_on) + dadmin_cost <- cyclecosts$admin[[1]][[tx_seq ]] * (sr$sr$disc$C$PFS_on + sr$sr$disc$C$PPS_on) + + dsubstrt_admin_cost <- substrt$partsa[Treatment==mol_name & Population == spop, admin_cost] * sr$sr$disc$C$prog_dt + dsubstrt_drug_cost <- substrt$partsa[Treatment==mol_name & Population == spop, drug_cost] * sr$sr$disc$C$prog_dt + dsubstrt_AE_cost <- substrt$partsa[Treatment==mol_name & Population == spop, AE_cost] * sr$sr$disc$C$prog_dt + + druc_preprog <- cyclecosts$mru_on[[1]][[tx_seq ]] * sr$sr$disc$C$PFS_on + cyclecosts$mru_off[[1]][[tx_seq ]] * sr$sr$disc$C$PFS_off + + sum(oneoffcosts[Apply.to == "Init"]$cost) * first_cycle + druc_postprog <- cyclecosts$mru_on[[1]][[tx_seq ]] * sr$sr$disc$C$PPS_on + cyclecosts$mru_off[[1]][[tx_seq ]] * sr$sr$disc$C$PPS_off + + dEOL_cost <- sum(oneoffcosts[Apply.to == "Death"]$cost) * sr$sr$disc$C$ddeath_dt + dprog_cost <- (sum(oneoffcosts[Apply.to == "Prog"]$cost) + sum(oneoffcosts[Apply.to == "Init"]$cost)) * sr$sr$disc$C$prog_dt + + if(ae$approach == "one-off") { + AE_dur <- mean(ae$one_off[Treatment.line==1 & Treatment.name==mol_name, duration_weeks]) + dAE_cost <- AE_dur * ae$per_cycle[line==1 & trt==mol_name, cost] * first_cycle + } else { + + dAE_cost <- ae$per_cycle[line==1 & trt==mol_name, cost]*(sr$sr$disc$C$PFS_on+sr$sr$disc$C$PPS_on) + } + + costs <- list( + undisc = list( + drug = drug_cost, + admin = admin_cost, + AE = AE_cost, + substrt_drug_cost = substrt_drug_cost, + substrt_admin_cost = substrt_admin_cost, + substrt_AE_cost = substrt_AE_cost, + mru_preprog = ruc_preprog, + mru_postprog = ruc_postprog, + EOL_cost = EOL_cost, + prog_cost = prog_cost + ), + disc = list( + drug = ddrug_cost, + admin = dadmin_cost, + AE = dAE_cost, + substrt_drug_cost = dsubstrt_drug_cost, + substrt_admin_cost = dsubstrt_admin_cost, + substrt_AE_cost = dsubstrt_AE_cost, + mru_preprog = druc_preprog, + mru_postprog = druc_postprog, + EOL_cost = dEOL_cost, + prog_cost = dprog_cost + ) + ) + + preprog_hsuv <- as.list(util$hsuv[Population == rpop_n & Treatment.line == 1 & Molecule == mol_num,list(OnTxt,OffTxt)]) + postprog_hsuv <- as.list(util$hsuv[Population == 0 & Treatment.line == 2 & Molecule == mol_num,list(OnTxt,OffTxt)]) # always population 0 at line 2+ + + # Expand this to the time horizon by multiplying column-wise by the gpop line. + # The result is U, which we can element-wise multiply by our consolidated trace + # to get our undiscounted QALYs :) + Upreprog <- util_gpop_mat[,1:length(preprog_hsuv)] %*% diag(preprog_hsuv) + Upostprog <- util_gpop_mat[,1:length(postprog_hsuv)] %*% diag(postprog_hsuv) + + qalys_PFS <- (sr$sr$undisc$PFS_on * Upreprog[,1] + sr$sr$undisc$PFS_off* Upreprog[,2]) * basic$cl_y + qalys_PPS <- (sr$sr$undisc$PPS_on * Upostprog[,1] + sr$sr$undisc$PPS_off* Upostprog[,2]) * basic$cl_y + qalys_PPS_AEs <- substrt$partsa[Treatment==mol_name & Population == spop, AE_QALY_impact] * sr$sr$undisc$prog_dt * util_gpop_mat[spop_n+1] + + dqalys_PFS <- (sr$sr$disc$Q$PFS_on * Upreprog[,1] + sr$sr$disc$Q$PFS_off* Upreprog[,2]) * basic$cl_y + dqalys_PPS <- (sr$sr$disc$Q$PPS_on * Upostprog[,1] + sr$sr$disc$Q$PPS_off* Upostprog[,2]) * basic$cl_y + dqalys_PPS_AEs <- substrt$partsa[Treatment==mol_name & Population == spop, AE_QALY_impact] * sr$sr$disc$Q$prog_dt * util_gpop_mat[spop_n+1] + + + if(ae$approach == "one-off") { + AE_dur <- mean(ae$one_off[Treatment.line==1 & Treatment.name==mol_name, duration_weeks]) + AE_QALYs <- AE_dur * ae$per_cycle[line==1 & trt==mol_name, QALYs] * first_cycle + dAE_QALYs <- AE_QALYs + } else { + + AE_QALYs <- ae$per_cycle[line==1 & trt==mol_name, QALYs]*(sr$sr$undisc$PFS_on+sr$sr$undisc$PPS_on) * util_gpop_mat[spop_n+1] + dAE_QALYs <- ae$per_cycle[line==1 & trt==mol_name, QALYs]*(sr$sr$disc$Q$PFS_on+sr$sr$disc$Q$PPS_on) * util_gpop_mat[spop_n+1] + } + + + + qalys <- list( + undisc = list( + PFS = qalys_PFS, + PPS = qalys_PPS, + AE = AE_QALYs, + AE_PPS = qalys_PPS_AEs + ), + disc = list( + PFS = dqalys_PFS, + PPS = dqalys_PPS, + AE = dAE_QALYs, + AE_PPS = dqalys_PPS_AEs + ) + ) + + ly_PFS_on <- sr$sr$undisc$PFS_on * basic$cl_y + ly_PFS_off <- sr$sr$undisc$PFS_off * basic$cl_y + ly_PPS_on <- sr$sr$undisc$PPS_on * basic$cl_y + ly_PPS_off <- sr$sr$undisc$PPS_off * basic$cl_y + + + lys <- list( + PFS_on = ly_PFS_on, + PFS_off = ly_PFS_off, + PPS_on = ly_PPS_on, + PPS_off = ly_PPS_off + ) + + out <- list( + sr = sr$sr, + sr_plot = sr$sr_plot, + costs = costs, + qalys = qalys, + lys = lys + ) + + return(out) + + }) + names(PartSA_pf) <- p$basic$lookup$ipd$mol[match(sm$trt_1L,p$basic$lookup$ipd$mol$Number),RCC_input_desc] + + + return(PartSA_pf) + }) +} \ No newline at end of file diff --git a/3_Functions/patient_flow/qalys.R b/3_Functions/patient_flow/qalys.R new file mode 100644 index 0000000..c0e832e --- /dev/null +++ b/3_Functions/patient_flow/qalys.R @@ -0,0 +1,4 @@ +# Function is incorporated as code within f_pf_computePF_mk +f_pf_mk_qalys <- function(TRACE, basic, util) { + +} \ No newline at end of file diff --git a/3_Functions/psa/psa functions.R b/3_Functions/psa/psa functions.R new file mode 100644 index 0000000..89dc8fe --- /dev/null +++ b/3_Functions/psa/psa functions.R @@ -0,0 +1,1472 @@ +# ~~ Reference curves ----------------------------------------------------- + +# reference curves require generating sets of parameter estimates using +# the parameters and their variance-covariance matrices. To that end, we define +# several functions: +# +# - Efficiently drawing from multivariate normal using eigenvalue appraoch (per MASS package) +# - Generating one set of reference curves across all PLMTEs +# + +#' Function which takes uniform random draws and produces mutlinorminv results +#' +#' @param rands uniform random draws to be entered into this function. Ensures reproducibility +#' @param mu named vector of values. if no names then the matrix output will have none +#' @param sigma variance-covariance matrix +#' +#' @details credit to `MASS::mvrnorm` which has the code but doesn't let you insert +#' uniform draws. +#' +#' +f_qmultinorminv <- function(rands,mu,sigma) { + p <- length(mu) + n <- length(rands) / p + X <- matrix(qnorm(rands), n) + eS <- eigen(sigma) + ev <- eS$values + out <- t(mu + eS$vectors %*% diag(sqrt(pmax(ev, 0)), p) %*% t(X)) + colnames(out) <- names(mu) + return(out) +} + +#' Function to quickly translate from description to number using the excel +#' lookup tables (provided in the function) +f_psa_trans_desc2id <- function(id_list, lookups) list( + pop = paste0("pop_",lookups$pop[Description == id_list$pop,]$Number), + line = paste0("line_",lookups$line[Description == id_list$line,]$Number), + mol = paste0("mol_",lookups$mol[Description == id_list$mol,]$Number), + trial = paste0("trial_",lookups$trial[Description == id_list$tr,]$Number), + endpoint = paste0("endpoint_",lookups$endpoint[Description == id_list$endpoint,]$Number) +) +f_psa_trans_id2numb <- function(id_list) list( + pop = as.numeric(gsub("pop_","",id_list$pop)), + line = as.numeric(gsub("line_","",id_list$line)), + mol = as.numeric(gsub("mol_","",id_list$mol)), + trial = as.numeric(gsub("trial_","",id_list$trial)), + endpoint = as.numeric(gsub("endpoint_","",id_list$endpoint)) +) + + +#' Function to draw parametric model parameters for all reference curves for all +#' distributions +#' +#' +#' +f_PSA_drawFSParams <- function(surv_regs, n_psa, lookups, return_rands = FALSE, verbose = FALSE) { + lapply(surv_regs, function(risk_pop) { + lapply(risk_pop, function(tr_line) { + lapply(tr_line, function(mol) { + lapply(mol, function(trial) { + lapply(trial, function(endpoint) { + if (is.null(endpoint$fs_fits)) { + return(NULL) + } else { + fits <- .subset2(endpoint,"fs_fits") + # Since we are only going to use one of these distributions, + # we can apply one set of random numbers + max_length <- max(lengths(fits)) + rands <- runif(max_length * n_psa) + plmte_id <- f_psa_trans_desc2id( + id_list = endpoint[c("pop", "line", "mol", "tr", "endpoint")], + lookups = lookups + ) + if(verbose) f_misc_colcat(paste0( + "RNG for PSA - reference curve parameters | ", + paste(unlist(plmte_id), collapse = " | ") + )) + + fs_nam <- names(fits) + names(fs_nam) <- fs_nam + + out <- lapply(fs_nam, function(fs_finam) { + + fs_fit <- .subset2(.subset2(endpoint,"fs_fits"),fs_finam) + + if("logical" %in% class(.subset2(fs_fit,"vcov"))) { + warning(paste0( + "There is no variance-covariance matrix available for ", + paste(unlist(plmte_id), collapse = " | "), + " for ",fs_finam,".", + " I cannot compute probabilsitic parameters and therefore have to use the mean!") + ) + return(matrix(rep(.subset2(fs_fit,"coefs"),n_psa),nrow=n_psa,byrow = TRUE)) + } + f_qmultinorminv( + rands = rands[1:(length(.subset2(fs_fit,"coefs")) * n_psa)], + mu = .subset2(fs_fit,"coefs"), + sigma = .subset2(fs_fit,"vcov") + ) + }) + # If the user wants the uniform draws to replicate, then it can be done. + if (return_rands) { + return(list(draws=out, id=plmte_id, rands = rands)) + } else { + return(list(draws = out, id = plmte_id)) + } + } + }) + }) + }) + }) + }) +} + + + +#' Function to filter down the PSA PSM parameters to the distributions selected +#' in the excel book (and therefore `i`) +f_psa_surv_params_filter <- function(psa_psm_param, excel_eff_table, lookups) { + + # If it's not a data.table, make it one: + if (!"data.table" %in% class(excel_eff_table)) excel_eff_table <- data.table(excel_eff_table) + + # Filter down to curve selections: + curve_select <- excel_eff_table[Include.in.this.analysis. == "Yes" & Effectiveness.data.source == "Trial survival analysis", list( + Population, + Treatment.line, + Molecule, + End.point, + Curve.fit..for.survival.analysis. + )] + + # Now, we can use each id for each set of draws to filter down this table + # to the relevant row. With that row we can select the appropriate + # parametric model and ONLY compute extrapolations for that one, then take + # the colsums or trapz after extrapolating only the relevant model + lapply(psa_psm_param, function(risk_pop) { + lapply(risk_pop, function(tr_line) { + lapply(tr_line, function(mol) { + lapply(mol, function(trial) { + lapply(trial, function(endpoint) { + if (is.null(endpoint)) { + return(NULL) + } else { + + # Use the function we made above to get numbers we can use to filter + # the table: + idn <- f_psa_trans_id2numb(endpoint$id) + tabmatch <- curve_select[Population == idn$pop & Treatment.line == idn$line & Molecule == idn$mol & End.point == idn$endpoint,] + + if (nrow(tabmatch) == 0) return (NULL) + + curve <- lookups$dist[match(tabmatch$Curve.fit..for.survival.analysis., lookups$dist$Description),]$RCC_input_desc + endpoint$draws <- endpoint$draws[[curve]] + endpoint$id$dist <- curve + return(endpoint) + } + }) + }) + }) + }) + }) +} + + + +#' Computes the approximate lambda rate (1/AUC) using either fast or accurate +#' method. Respects the time unit of the CE model in cycles, so conversion comes +#' after. +#' +#' @param psa_params the result of function f_PSA_drawFSParams, which draws from a multivariate normal +#' @param method either sum or trap. sum sums the columns, trap uses the trapezoidal rule +#' +#' @details Cycle down all levels of PLMTE. if there are parameters there, convert +#' those parameters into approximate exponental rates by extrapolating the lines +#' and then calculating AUC using sum or fast method. This is computed in +#' model cycles, meaning the lambda for the exponential is in model cycles as +#' the time unit of the analysis. +#' +f_psa_approx_lambda <- function(psa_params, method = "sum", th, disc=NULL) { + # Checking that the method is one of those that are allowed: + stopifnot(method %in% c("sum", "trap")) + + # Set up the time vector to avoid repeatedly making it thousands of times: + t <- 0:th + + if (is.null(disc)) { + disc <- rep(1,th+1) + } else { + stopifnot(length(disc)== th+1) + } + + # Cycle through the levels of PLMTE, if nothing to do do nothing, otherwise + lapply(psa_params, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(plmte) { + + if (is.null(plmte)) return(NULL) + + # So if there's something in this plmte it will be a list of + # different parametric distributions. each element is a set of + # parameters per row, with each row being one PSA iteration. + + + dr <- .subset2(plmte,"draws") + par_nam <- colnames(dr) + drt <- t(dr) + dis <- plmte$id$dist + npsa <- ncol(drt) + + # Two methods - one for speed one for accuracy: + if (method == "sum") { + vapply( + X = 1:npsa, + FUN = function(cyc) 1 / sum(f_extrapolate(t, drt[, cyc], dis) * disc), + FUN.VALUE = numeric(1) + ) + } else { + # More precise method using trapezoidal integration + vapply( + X = 1:npsa, + FUN = function(cyc) 1 / trapz(t,f_extrapolate(t, drt[, cyc], dis) * disc), + FUN.VALUE = numeric(1) + ) + } + }) + }) + }) + }) + }) +} + + +#' Function to apply `lambda` and `t` to extrapolate an exponential line +f_psa_exp <- function(t,lamda) exp(-lamda*t) + + + +#' function to compute the first TP from a lambda for an exponential and therefore +#' all TPs as they are time invariant. +#' +#' @param PSA_est_Lambdas result of function `f_psa_approx_lambda` +#' +#' @details Lambda can be translated to transition probability TP like so: +#' +#' TP_t = 1-s(t)/s(t-1) +#' Given that the rate is constant with exponential, all TP for a given endpoint +#' are equal to TP_t, that is TP = 1-s(t)/s(t-1). +#' +#' At cycle 1 (given cycle 0 is model start), TP_t = 1-(s(t) / s(t-1)), but s(t-1) +#' is known to be 1. Therefore TP = 1-s(1). +#' +#' Thus, the entire set of lambdas can be converted to TPs by cycling through them +#' and applying 1-f_psa_exp(1,lambda) +#' +f_psa_lambda2TP <- function(PSA_est_Lambdas) { + lapply(PSA_est_Lambdas, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(plmte) { + if(is.null(plmte)) { + return(NULL) + } else { + 1-f_psa_exp(1,plmte) + } + }) + }) + }) + }) + }) +} +f_psa_lambda2St <- function(PSA_est_Lambdas,t) { + TH <- length(t) + lapply(PSA_est_Lambdas, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(plmte) { + if(is.null(plmte)) { + return(NULL) + } else { + vapply(X = 1:length(plmte), FUN.VALUE = numeric(TH), FUN = function(psa_it) f_psa_exp(t,plmte[psa_it])) + } + }) + }) + }) + }) + }) +} + + + + +#' Function taking extrapolated survival in different treatment lines and +#' either computing TP out of that state or lambda for exponential approximation. +f_psa_collapse_st_lambda2lplus <- function(st, th, disc, dfacQ = NULL) { + + # If the user wants discounted lambda rates they have to provide the discount factor! + if (disc) stopifnot(!is.null(dfacQ)) + + # Drill down from the top level to the line level. if 1L compute TP otherwise + # compute lambda: + lapply(st, function(popu) { + + na_li <- names(popu) + names(na_li) <- na_li + + lapply(na_li, function(li_lab) { + + li <- popu[[li_lab]] + + # li <- st$pop_0$line_1 + + # Now, if we are in first-line, we do something differently as we + # simply calculate TP + # + # If we are in 2L+ we compute lambda (1/int(s(t))) + if (li_lab == "line_1") { + # This is 1st line, so keep the shape and compute the TP for it + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(endp) { + # If there's no s(t) to manipulate, do nothing + if (!"st" %in% names(endp)) { + return(endp) + } else { + endp$tp <- 1 - (endp$st / shift(endp$st,fill = 1)) + endp$st <- NULL + return(endp) + } + }) + }) + }) + } else { + # this is the 2L+, so compute lambda + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(endp) { + # cat(paste0(paste(endp$dest,collapse = "$"),"\n")) + # If there's no s(t) to manipulate, do nothing + if(!"include" %in% names(endp)) { + return(endp) + } else if (endp$include == FALSE) { + return(endp) + } else if (!"st" %in% names(endp)) { + if(length(endp$fp$HR)==th+1) endp$fp$HR <- NULL + return(endp) + } else if (all(length(endp$st)==1,is.na(endp$st[1]))) { + if(length(endp$fp$HR)==th+1) endp$fp$HR <- NULL + return(endp) + } else { + # If discounted lambda, use the discount factor, if not, don't! + if (disc) { + endp$lambda <- 1 / trapz(0:th,endp$st * dfacQ) + } else { + endp$lambda <- 1 / trapz(0:th,endp$st) + } + if(length(endp$fp$HR)==th+1) endp$fp$HR <- NULL + endp$st <- NULL + return(endp) + } + }) + }) + }) + } + }) + }) +} + + + + +#' Function to generate all PSA iterations for all hazard ratios in one go within +#' the PLMTE structure. +#' +#' @details cycles down the excel table and for each row makes npsa normal draws +#' for the hazard ratio to apply in that position. These HRs are then entered +#' into the efficacy networks during the assumption step. +#' +f_psa_assumptionsTab_genHRs <- function(excel_efficacy_table,npsa) { + excel_efficacy_table$r <- 1:nrow(excel_efficacy_table) + tab <- data.table(excel_efficacy_table)[Effectiveness.data.source == "Apply HR to",] + + # Cycle down the table one row at a time generating HRs for all PSA iterations + # for all of those which are informed by those HRs. This assumes that all HRs + # are independent from each other + # + lapply(1:nrow(tab), function(HR_row) { + list2env(as.list(tab[HR_row,]),envir = environment()) + list( + dest = list( + pop = paste0("pop_", Population), + line = paste0("line_", Treatment.line), + mol = paste0("mol_", Molecule), + trial = paste0("trial_", Origin.trial), + endpoint = paste0("endpoint_", End.point) + ), + orig = list( + pop = paste0("pop_", Origin.population), + line = paste0("line_", Origin.line), + mol = paste0("mol_", Origin.treatment), + trial = paste0("trial_", Origin.trial), + endpoint = paste0("endpoint_", Origin.endpoint) + ), + # hr = rnorm(npsa,HR.to.apply,(HR.95..CI..UCL. - HR.to.apply)/1.96) + hr = rlnorm(npsa, log(HR.to.apply), estSDlog(HR.to.apply, HR.95..CI..LCL., HR.95..CI..UCL.)) + + ) + + }) +} + + +f_misc_plmte2StringID <- function(plmte_id_list) paste(unlist(plmte_id_list),collapse = "$") + + +#' function to go into demographics and pull out just the PSA iteration +#' requested +f_psa_get_it_demo <- function(demo,it) { + lapply(demo, function(popu) { + lapply(popu, function(li) { + lapply(li, function(cata) { + if (class(cata) == "numeric") { + return(cata[it]) + } else { + cata + } + }) + }) + }) +} + + + + +#' Function for the lambda approximation which provides vector of costs for 1L +#' and per-cycle costs for 2L+ +#' +#' @details This function should be used to collapse the cost object for ONE +#' PSA RUN. Each PSA version of the cost object p$costs$mk should contain the +#' exact same structure as the deterministic model. This function collapses +#' the data to just what's needed for one PSA iteration. +#' +f_psa_lambda_cost <- function(cost) { + lapply(cost, function(category) { + li_lab <- names(category) + names(li_lab) <- li_lab + lapply(li_lab, function(li_nam) { + li <- category[[li_nam]] + + # if first-line we need the full vector, if not we need per cycle: + if (li_nam == "line_1") { + return(li) + } else { + return(lapply(li,mean)) + } + }) + }) +} + + +#' Lambda approximation version of TP calculator +f_psa_pf_mk_ComputeTPs <- function(tp_list) { + out <- lapply(1:length(tp_list), function(trt_li) { + + # Figure out the time horizon: + tl <- tp_list[[trt_li]] + + # In this version of the function, if treatment line is 1, then we do what + # we did in the deterministic model, otherwise the rates are all fixed + # from lambda + if (trt_li < length(tp_list)) { + + if (trt_li == 1) { + # Pull out the endpoints for ease of reading. These are time-varying + tp_TTD <- tl$TTD$tp + tp_TTP <- tl$TTP$tp + tp_PFS <- tl$PFS$tp + } else { + # lambda-based - TP=1-(s(t) / s(t-1)); s(t-1) = 1, so 1-s(t) + lambda_list <- tl$OS$lambda + tp_TTD <- 1-f_psa_exp(1,tl$TTD$lambda) + tp_TTP <- 1-f_psa_exp(1,tl$TTP$lambda) + tp_PFS <- 1-f_psa_exp(1,tl$PFS$lambda) + } + + # Apply assumptions to figure out TP from each state to each other state: + disc <- tp_TTD + + # ON-treatment transition probabilities + + # probability of death directly from on treatment state: + death_on <- 1 - tp_TTP - (1-tp_PFS) + + # Going directly onto next line from the on treatment state + + # The probability of discontinuation removing the probability of any other event + disc_only <- disc - tp_TTP - death_on + disc_only_adj <- pmax(disc_only, 0) + + # The probability of going to next therapy but not death directly from on-treatment + next_on <- tp_TTP - (disc_only_adj - disc_only) + + # the probability of staying on treatment is simply the inverse of the probability + # of going off treatment for any reason + stay_on <- 1 - disc + + # The probability of discontinuation when on treatment + disc_on <- disc_only_adj + + # Just to note some equivalence here: + # 1 - next_on - disc - stay_on == 1 - tp_TTP - (1-tp_PFS) + + next_off <- next_on + death_off <- death_on + stay_off <- 1 - death_on - next_on + + } else { + # Lambda approximate rates in the last line + tp_OS <- 1-f_psa_exp(1,tl$OS$lambda) + + stay_on <- 1-tp_OS + disc_on <- 0 + next_on <- 0 + death_on <- tp_OS + + next_off <- 0 + death_off <- 0 + stay_off <- 0 + } + + + + # return a list of TPs for each of the 7 required to compute the Markov model + return(list( + disc_on = disc_on, + stay_on = stay_on, + next_on = next_on, + death_on = death_on, + stay_off = stay_off, + next_off = next_off, + death_off = death_off + )) + + }) + names(out) <- c(paste0("L",1:(length(tp_list)-1)),"NT") + return(out) +} + + + +f_psa_lambda_M_compile <- function(TPM, max_active, TPs) { + + TH <- length(TPs$L1$disc_on) + nline <- length(TPs) + + # Strip down TPM to the size required + included_index <- c(1:((nline-1) * 2), ((max_active+1)*2)-1,(max_active+1)*2) + ATL <- nline-1 + + # First, we strip down TPM to suit the number of ATLs we have + m <- TPM[included_index,included_index] + dm <- dim(m)[[1]] + + # Now that we have the empty version of the matrix to fill in, let's populate + # it with some transitions. + # + # We leave first-line transitions blank because they get populated at the end + # of the function (when we expand it out to per-cycle). 2L+ are all time-invariant + + # Any line can be NT, so we use the names within TP to capture that, and create + # a matrix of co-ordinates and values to put in those co-ordinates, then put + # them there in one command + coord <- do.call( + rbind, + lapply(2:nline, function(tx_line) { + # top left element of this set is linex2 - 1 + tl <- (tx_line * 2) - 1 + tr <- TPs[[tx_line]] + if (tx_line == nline) { + # This line is BSC. There is ONLY the probability of death or stay + # and there is no "next". In this situation only top left and top left+1 + # are the co-ordinates: + matrix( + c( + tl , tl , .subset2(tr,"stay_on"), + tl , tl+1, .subset2(tr,"death_on"), + tl+1, tl+1, 1 + ), + ncol=3, + byrow = TRUE + ) + } else { + # this is not the last line of therapy, so we need the full suite: + matrix( + c( + tl , tl , .subset2(tr,"stay_on"), + tl , tl+1, .subset2(tr,"disc_on"), + tl , tl+2, .subset2(tr,"next_on"), + tl , dm , .subset2(tr,"death_on"), + tl+1, tl+1, .subset2(tr,"stay_off"), + tl+1, tl+2, .subset2(tr,"next_off"), + tl+1, dm , .subset2(tr,"death_off") + ), + ncol=3, + byrow = TRUE + ) + } + + }) + ) + m[coord[,1:2]] <- coord[,3] + + L1_trans <- .subset2(TPs,"L1") + list2env(L1_trans, envir = environment()) + # m is now the time-invariant part of the matrix which can be replicated TH + # times: + return(lapply(1:TH, function(cyc) { + + # Like with the time invariant, but with the time-varying bit, make a + # co-ordinate matrix: + coord <- matrix( + c( + 1, 1 , .subset2(stay_on,cyc), + 1, 2 , .subset2(disc_on,cyc), + 1, 3 , .subset2(next_on,cyc), + 1, dm, .subset2(death_on,cyc), + 2, 2 , .subset2(stay_off,cyc), + 2, 3 , .subset2(next_off,cyc), + 2, dm, .subset2(death_off,cyc) + ), + ncol=3, + byrow = TRUE + ) + m[coord[,1:2]] <- coord[,3] + return(m) + })) +} + + + + +#' Patient flow calculator for probabilistic sensitivity analysis using lambda approximation +#' to remove the need for tunnel states +#' +#' @details this function has less comments in it, see f_pf_computePF_mk for +#' fully explained. Where this function differs from f_pf_computePF_mk there are +#' comments in here. +#' +f_psa_pf_computePF_mkLambda <- function(pops, + basic, + demo, + sequences, + survival, + costs, + util, + ae, + eff_table, + verbose, + include_plots, + just_nlines, + just_seq) { + + # Some basic inputs for easy reference and avoiding repitition in the code: + dfac_q <- .subset2(basic,"discFacQ") + dfac_c <- .subset2(basic,"discFacC") + + # abbreviate cost per cycle (cpc) and costs on initation (coi). we only need the + # mols which we are using for this sequence. one off are for all. + cpc <- .subset2(costs,"per_cycle") + coo <- .subset2(costs,"one_off") + coi <- .subset2(coo,"Prog") + cod <- .subset2(coo,"Death") + + # Getting more specific: + cpc_d <-.subset2(cpc,"drug") + cpc_a <-.subset2(cpc,"admin") + cpc_m_on <-.subset2(cpc,"mru_on") + cpc_m_off <-.subset2(cpc,"mru_off") + + + # Now simulate the treatment sequences: + max_active <- max(sapply(sequences$qc, ncol)) - 1 + lookup <- basic$lookup + pop_map <- lookup$pop_map + th <- basic$th+1 + + # Full size empty TPM, then expand out to list of TH length + TPM <- matrix( + 0, + nrow = ((max_active+1)*2), + ncol = ((max_active+1)*2), + dimnames = lapply(1:2, function(x) { + c( + unlist(lapply(1:max_active, function(trt_line) { + paste0("L",trt_line,c("_on", "_off")) + })), + "BSC", "dead" + ) + }) + ) + + # population level inputs, these change at each population: + lapply(pops, function(popu) { + # population ids + popu_n <- as.integer(gsub("pop_","",popu)) + overall_pop <- as.list(pop_map[Overall.population.number == popu_n,]) + rpop_n <- overall_pop$Risk.population.number + rpop <- paste0("pop_",rpop_n) + spop_n <- overall_pop$Sequencing.population.number + spop <- paste0("pop_",spop_n) + + # demographics + demog <- demo[[rpop]] + + # Sequence list derivation + L1_inc_rpop <- sort(unique(eff_table[Treatment.line == 1 & + Population == rpop_n & + Include.in.this.analysis. == "Yes", ]$Molecule)) + L2p_inc_rpop <- lapply(2:max_active, function(line_n) { + sort(unique(eff_table[Treatment.line == line_n & + Population == 0 & Include.in.this.analysis. == "Yes", ]$Molecule)) + }) + trt_filter <- c(list(L1_inc_rpop),L2p_inc_rpop) + rm(L1_inc_rpop) + rm(L2p_inc_rpop) + ateachline <- sequences$n[[spop]] + ateachline$nlines <- ncol(ateachline)-rowSums(is.na(ateachline)) + molecule_list_full <- lapply(2:(ncol(ateachline)-1), function(n_lines) { + ateachline[nlines==n_lines,1:n_lines] + }) + sequence_list <- lapply(molecule_list_full, function(lines_amount) { + Reduce( + x = 1:(ncol(lines_amount)-1), + init = lines_amount, + accumulate = FALSE, + f = function(prev, treatment_line) { + + # Find out which molecules are in that line column and filter the table + molecules_allowed <- trt_filter[[treatment_line]] + which_to_allow <- which(prev[[treatment_line]] %in% molecules_allowed) + return(prev[which_to_allow,]) + } + ) + }) + + # account for running subset + if (!is.null(just_nlines)) { + sequence_list <- sequence_list[just_nlines] + names(sequence_list) <- paste0("active_lines_",just_nlines) + } else { + names(sequence_list) <- paste0("active_lines_",1:length(sequence_list)) + } + + # gpop matrix to multiply HRQoL by over time + util_gpop_mat <- matrix( + util$gpop[[rpop]], + nrow = basic$th+1, + ncol = (max_active*2) +1 + 1 + ) + + # Sequence "block" populations - treatments with specific amounts of ATLs: + return(lapply(sequence_list, function(amount_of_lines) { + + seq_id <- 1:nrow(amount_of_lines) + names(seq_id) <- paste0("seq_",seq_id) + if (!is.null(just_seq)) {seq_id <- seq_id[just_seq]} + + + # Treatment sequence level: specific population, ATLs, cycling down: + lapply(seq_id, function(seq_n) { + + number_of_lines <- ncol(amount_of_lines) + + # Pull out this specific treatment sequence: + trt_seq <- structure(paste0("mol_",amount_of_lines[seq_n,]),.Names=names(amount_of_lines[seq_n,])) + trts_n <- as.integer(gsub("mol_","",trt_seq)) + + trt_seq_plotLab <- trt_seq + names(trt_seq_plotLab) <- gsub("line_","L",names(trt_seq)) + names(trt_seq_plotLab)[length(names(trt_seq_plotLab))] <- "NT" + + treatment_names <- basic$lookup$ipd$mol[match(trts_n,basic$lookup$ipd$mol$Number),Description] + treatment_abbr <- basic$lookup$ipd$mol[match(trts_n,basic$lookup$ipd$mol$Number),RCC_input_desc] + + if (verbose) cat(paste0( + "PSA #", util$hsuv$iteration[1], + " | ", popu, + " | ", paste(treatment_names,collapse = "->"),"\n" + )) + + + # In this case we provide survival$tp instead of survival$st. This returns + # the PLMTE format extraps, but instead of st for each of them there is + # tp at first-line and lambda at 2L+. + extraps <- f_seq_extrapCollector( + treatment_sequence = trt_seq, + st = survival$tp, + lookups = lookup, + pop_n = rpop_n, + pop_0_2Lp = TRUE + ) + + # For the lambda approximation, the TP calculations are the same + # methodologically but the way to GET those TPs is different + TPs <- f_psa_pf_mk_ComputeTPs(tp_list = extraps$st) + + # Correct for non-finite TPs: + TPs <- lapply(TPs, function(li) { + lapply(li, function(tp) { + tp[!is.finite(tp)] <- 0 + tp + }) + }) + + # For computing L1 off trt entrants + L1_onoff_adjustment <- TPs$L1$death_on + TPs$L1$next_on + + # Compile TH Ms + M <- f_psa_lambda_M_compile( + TPM = TPM, + max_active = max_active, + TPs = TPs + ) + + # Set up the baseline population (all on 1L on trt) + p_0 <- numeric(dim(M[[1]])[[1]]) + p_0[1] <- 1 + + # Undiscounted trace: + TRACE <- matrix( + data = unlist( + Reduce( + x = 1:p$basic$th, + init = p_0, + accumulate = TRUE, + f = function(prev_pop, cyc) { + prev_pop %*% M[[cyc]] + } + ), + use.names = FALSE + ), + byrow = TRUE, + ncol = dim(M[[1]])[[1]], + dimnames = list(NULL,dimnames(M[[1]])[[1]]) + ) + + tTRACE <- t(TRACE) + + M_nam <- dimnames(M[[1]])[[1]] + + # compute the % of the initial cohort transitioning into each state + # at each cycle beyond the 1st state. This is used to assign one-off + # costs/QALY losses. + ENTRANTS <- matrix( + unlist(lapply(1:ncol(tTRACE), function(cyc) { + dm <- dim(M[[cyc]])[1] + coord <- rbind( + cbind(1:(dm-1),2:dm), + cbind(seq(1,dm-2,2),seq(1,dm-2,2)+2) + ) + coord <- coord[order(coord[,1]),] + coord <- cbind(coord,M[[cyc]][coord]) + up_one_trans <- unlist(lapply(unique(coord[,2]), function(x) sum(coord[coord[,2]==x,3]))) + p_tm1 <- tTRACE[1:(nrow(tTRACE)-1),cyc] + return(as.numeric(p_tm1 * up_one_trans)) + }),use.names = FALSE), + nrow = ncol(tTRACE), + byrow = TRUE, + dimnames = list(NULL,M_nam[2:length(M_nam)]) + ) + + # Discounted versions: + DTRACEQ <- TRACE * dfac_q + DTRACEC <- TRACE * dfac_c + tDTRACEQ <- t(DTRACEQ) + tDTRACEC <- t(DTRACEC) + DENTRANTSQ <- ENTRANTS * dfac_q + DENTRANTSC <- ENTRANTS * dfac_c + + # drop some values to save memory + rm(extraps, M, TPs, tTRACE, M_nam) + + # Generate something similar to the consolidated_trace object + # we had for the deterministic model: + CT <- list( + OS = rowSums(TRACE[,1:(length(p_0)-1)]), + full_lines = TRACE, + entrants = ENTRANTS + ) + CT_disc <- list( + OS_q = rowSums(DTRACEQ[,1:(length(p_0)-1)]), + OS_c = rowSums(DTRACEC[,1:(length(p_0)-1)]), + full_lines_q = DTRACEQ, + full_lines_c = DTRACEC, + entrants_q = DENTRANTSQ, + entrants_c = DENTRANTSC + ) + + # We already made the cost elements once so now we use them + + # cycle through the treatment sequence, computing the different cost components: + + pf_costs <- lapply(1:length(trt_seq), function(line_n) { + mol <- trt_seq[line_n] + + if (line_n == 1) { + # first-line - vector multiplied by cost + nam_mol <- names(mol) + + tr_1l_on <- TRACE[,1] + tr_1l_off <- TRACE[,2] + + drug_cost <- cpc_d[[nam_mol]][[mol]] * tr_1l_on + admin_cost <- cpc_a[[nam_mol]][[mol]] * tr_1l_on + ruc_on <- cpc_m_on[[nam_mol]][[mol]] * tr_1l_on + ruc_off <- cpc_m_off[[nam_mol]][[mol]] * tr_1l_off + + # Discounted version: + dtr_1l_on <- TRACE[,1] * dfac_c + dtr_1l_off <- TRACE[,2] * dfac_c + + ddrug_cost <- cpc_d[[nam_mol]][[mol]] * dtr_1l_on + dadmin_cost <- cpc_a[[nam_mol]][[mol]] * dtr_1l_on + druc_on <- cpc_m_on[[nam_mol]][[mol]] * dtr_1l_on + druc_off <- cpc_m_off[[nam_mol]][[mol]] * dtr_1l_off + + rm(tr_1l_on,tr_1l_off,dtr_1l_on, dtr_1l_off) + + } else if (line_n < length(trt_seq)) { + + # In the lambda approximated world, the costs are time invariant in 2L+ + nam_mol <- names(mol) + tr_on <- TRACE[,(line_n*2)-1] + tr_off <- TRACE[,line_n*2] + e_on <- ENTRANTS[,(line_n*2)-2] + + dc <- cpc_d[[nam_mol]][[mol]] + ac <- cpc_a[[nam_mol]][[mol]] + ru_on <- cpc_m_on[[nam_mol]][[mol]] + ru_off <- cpc_m_off[[nam_mol]][[mol]] + + drug_cost <- tr_on * dc + admin_cost <- tr_on * ac + ruc_on <- (tr_on * ru_on) + (e_on * coi) + ruc_off <- tr_off * ru_off + + dtr_on <- DTRACEC[,(line_n*2)-1] + dtr_off <- DTRACEC[,line_n*2] + de_on <- DENTRANTSC[,(line_n*2)-2] + + ddrug_cost <- dtr_on * dc + dadmin_cost <- dtr_on * ac + druc_on <- (dtr_on * ru_on) + (de_on * coi) + druc_off <- dtr_off * ru_off + + rm(tr_on, tr_off,e_on, dtr_on,dc,ac,ru_on,ru_off) + + } else { + + # Future switch? Add in one-off costs upon entering this treatment line: + # In future, link this to the excel switch for applying this cost + # upon starting BSC. + if (TRUE) { + # Sometimes BSC is coming after any active treatment line. + # The cost should come from the last active treatment line's BSC + # values: + if (is.null(cpc$mru_on[[names(mol)]])) { + # reduce line id by 1 just for pulling out the right data + # from cpc: + namline_n <- as.numeric(gsub("line_","",names(mol))) + line_nam_temp <- paste0("line_",namline_n-1) + + # Now use this adjusted name to pull out the right values + mru_on_tx <- cpc$mru_on[[line_nam_temp]][[mol]] + } else { + mru_on_tx <- cpc$mru_on[[names(mol)]][[mol]] + } + } + + ruc_on <- (mru_on_tx * TRACE[,"BSC"]) + (ENTRANTS[,"BSC"] * coi) + druc_on <- (mru_on_tx * DTRACEC[,"BSC"]) + (DENTRANTSC[,"BSC"] * coi) + + drug_cost <- rep(0,th) + admin_cost <- rep(0,th) + ddrug_cost <- rep(0,th) + dadmin_cost <- rep(0,th) + ruc_off <- rep(0,th) + druc_off <- ruc_off + + } + + # Ok now no matter how many lines and their order we've done drug cost + # with full memory for our treatment sequence -_- + + return(list( + molecule = mol, + undisc = list( + drug = drug_cost, + admin = admin_cost, + mru_on = ruc_on, + mru_off = ruc_off + ), + disc = list( + drug = ddrug_cost, + admin = dadmin_cost, + mru_on = druc_on, + mru_off = druc_off + ) + )) + }) + names(pf_costs) <- names(trt_seq) + + # We now no longer need TRACE (or discounted trace) and can drop it to free up (a lot of) memory + rm(TRACE, DTRACEC, DTRACEQ) + + # Finally, add in end of life costs by multiplying the entrants to death from consolidated + # trace by the cost upon death + pf_costs$eol <- list( + undisc = CT$entrants[,ncol(CT$entrants)] * cod, + disc = CT_disc$entrants_c[,ncol(CT_disc$entrants_c)] * cod + ) + + # QALYs and AEs are calculated using the consolidated traces: pf_qalys + + # Get the HSUVs for the active treatment line by treatment status: + hsuv_active <- unlist(lapply(1:(number_of_lines-1), function(line_n) { + + # ASSUMEs POPULATION 0 IF LATER LINES!!!! + if (line_n == 1) { + line_hsuv <- as.list(util$hsuv[Population == rpop_n & Treatment.line == line_n & Molecule == trts_n[line_n],list(OnTxt,OffTxt)]) + } else { + line_hsuv <- as.list(util$hsuv[Population == 0 & Treatment.line == line_n & Molecule == trts_n[line_n],list(OnTxt,OffTxt)]) + } + + lab_line <- paste0("L",line_n) + names(line_hsuv) <- paste0(lab_line,c("_on","_off")) + unlist(line_hsuv) + })) + # Get the HSUV for BSC: + hsuv_bsc <- structure( + util$hsuv[Population == 0 & Treatment.line == number_of_lines & Molecule == trts_n[number_of_lines],]$OnTxt, + .Names = paste0("L",number_of_lines,"_on") + ) + + # Stick them together to form the vector of HSUVs per possible state for + # this treatment sequence: + hsuv_unadj <- c(hsuv_active,hsuv_bsc,"dead"=0) + + # Expand this to the time horizon by multiplying column-wise by the gpop line. + # The result is U, which we can element-wise multiply by our consolidated trace + # to get our undiscounted QALYs :) + U <- util_gpop_mat[,1:length(hsuv_unadj)] %*% diag(hsuv_unadj) + + rm(hsuv_active) + rm(hsuv_bsc) + rm(hsuv_unadj) + + # Undiscounted QALYs (note AEs are separate as they're a separate thing): + pf_qalys <- list( + undisc = (CT$full_lines * .subset2(basic,"cl_y")) * U, + disc = (CT_disc$full_lines_q * .subset2(basic,"cl_y")) * U + ) + + rm(U) + + # AEs undiscounted + + # pf_ae + # Same as above, compute your AE stuff here + + if(ae$approach[1] == "one-off") { + + ae_impact_atl <- do.call( + rbind, + lapply(1:(number_of_lines-1), function(line_n) { + c(cost = ae$per_cycle[ae$per_cycle$trt == treatment_abbr[line_n] & ae$per_cycle$line == line_n]$cost, + qaly = ae$per_cycle[ae$per_cycle$trt == treatment_abbr[line_n] & ae$per_cycle$line == line_n]$QALYs, + dur = mean( + ae$one_off$duration_weeks[ + ae$one_off$Molecule == trts_n[line_n] + & ae$one_off$Treatment.line == min(line_n,2) + ])) + }) + ) + + ae_impact_atl <- data.table(ae_impact_atl) + ae_oo_cost <- ae_impact_atl$cost * ae_impact_atl$dur + ae_oo_qaly <- ae_impact_atl$qaly * ae_impact_atl$dur + + ent <- data.table(as.matrix(CT$entrants)) + ent <- rbindlist(list(data.frame(t(structure(rep(0,ncol(ent)),.Names=colnames(ent)))),ent)) + ent <- cbind(L1_on = c(1,rep(0,nrow(ent)-1)),ent) + + dent_c <- data.table(as.matrix(CT_disc$entrants_c)) + dent_c <- rbindlist(list(data.frame(t(structure(rep(0,ncol(dent_c)),.Names=colnames(dent_c)))),dent_c)) + dent_c <- cbind(L1_on = c(1,rep(0,nrow(dent_c)-1)),dent_c) + + dent_q <- data.table(as.matrix(CT_disc$entrants_q)) + dent_q <- rbindlist(list(data.frame(t(structure(rep(0,ncol(dent_q)),.Names=colnames(dent_q)))),dent_q)) + dent_q <- cbind(L1_on = c(1,rep(0,nrow(dent_q)-1)),dent_q) + + nonzero_ind <- 1:number_of_lines + 0:(number_of_lines-1) + + nstate <- ncol(CT$full_lines) + n_cyc <- nrow(CT$full_lines) + nam_st <- colnames(CT$full_lines) + + # clever trick to expand a vector + ae_oo_cost <- "[<-"(numeric(nstate), nonzero_ind, c(ae_oo_cost,0)) + ae_oo_qaly <- "[<-"(numeric(nstate), nonzero_ind, c(ae_oo_qaly,0)) + + # multiply payoff by entrants + pf_ae <- list( + undisc = list( + costs = matrix( + as.matrix(ent)[1:th,] %*% diag(ae_oo_cost), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ), + qalys = matrix( + as.matrix(ent)[1:th,] %*% diag(ae_oo_qaly), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ) + ), + disc = list( + costs = matrix( + as.matrix(dent_c)[1:th,] %*% diag(ae_oo_cost), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ), + qalys = matrix( + as.matrix(dent_q)[1:th,] %*% diag(ae_oo_qaly), + ncol = nstate, + nrow = n_cyc, + dimnames = list(NULL,nam_st) + ) + ) + ) + + + } else { + # compute cost per cycle on treatment and qalys lost per cycle on treatment: + ae_cpc_ontrt <- unlist(lapply(1:number_of_lines, function(act_line_n) { + as.list(ae$per_cycle[line == act_line_n & molecule == trts_n[act_line_n],])$cost + })) + ae_qlpc_ontrt <- unlist(lapply(1:number_of_lines, function(act_line_n) { + as.list(ae$per_cycle[line == act_line_n & molecule == trts_n[act_line_n],])$QALYs + })) + # Make a correction for if we have 4 atl's: + if (number_of_lines == max_active+1) { + ae_cpc_ontrt[5] <- ae$per_cycle[line == max_active & molecule == trts_n[number_of_lines],]$cost + ae_qlpc_ontrt[5] <- ae$per_cycle[line == max_active & molecule == trts_n[number_of_lines],]$QALYs + } + nonzero_ind <- 1:number_of_lines + 0:(number_of_lines-1) + + # pad the cpc and qlpc with 0s: + ae_cpc <- "[<-"(numeric(ncol(CT$full_lines)), nonzero_ind, ae_cpc_ontrt) + ae_qlpc <- "[<-"(numeric(ncol(CT$full_lines)), nonzero_ind, ae_qlpc_ontrt) + + names(ae_cpc) <- colnames(CT$full_lines) + names(ae_qlpc) <- colnames(CT$full_lines) + + # Implement adverse events (yay) + pf_ae <- list( + undisc = list( + costs = matrix( + CT$full_lines %*% diag(ae_cpc), + ncol = ncol(CT$full_lines), + nrow = nrow(CT$full_lines), + dimnames = list(NULL,colnames(CT$full_lines)) + ), + qalys = matrix( + CT$full_lines %*% diag(ae_qlpc), + ncol = ncol(CT$full_lines), + nrow = nrow(CT$full_lines), + dimnames = list(NULL,colnames(CT$full_lines)) + ) + ), + disc = list( + costs = matrix( + (CT$full_lines * dfac_c) %*% diag(ae_cpc), + ncol = ncol(CT$full_lines), + nrow = nrow(CT$full_lines), + dimnames = list(NULL,colnames(CT$full_lines)) + ), + qalys = matrix( + CT_disc$full_lines %*% diag(ae_qlpc), + ncol = ncol(CT$full_lines), + nrow = nrow(CT$full_lines), + dimnames = list(NULL,colnames(CT$full_lines)) + ) + ) + ) + } + + if(include_plots) { + f_plot_mk_draw_consol_trace(consol_trace = CT$full_lines, + treatment_names = treatment_names, + tmax = 15) + } + + + return(list( + population = popu, + trt_nam = treatment_names, + trace_consol = CT$full_lines, + costs = pf_costs, + qalys = pf_qalys, + aes = pf_ae + )) + }) + })) + }) +} + + + + + +# Post run analysis ------------------------------------------------------- + + + + +#' Function to generate probabilistic draws for the subsequent treatment proportions +#' +#' @param subsTx_table The named range `R_table_sub_txts_prop_n_costs` from excel +#' @param sims default 10000 - the amount of samples to draw. +#' @param lookups `p$basic$lookup` containing the lookup tables to make the columns to match model results +#' +sub_tx_PSA_samples <- function(subsTx_table, sims = 10000, lookups, PSA = TRUE) { + + # strip the table down to what gets used in the CE model to reduce unecessary + # stuff + subsTx_table <- data.table(subsTx_table) + subsTx_table <- subsTx_table[!is.na(Population),list(Population,Line.1,Line.2,Line.3,Line.4,Adj.proportion.given.line.1,n)] + + # Make the columns that match with the model results: + + lu_mol <- lookups$ipd$mol + + # Weighting table: + # The subsTx table only needs to be computed once, so just get it done: + subsTx_table$L1 <- lu_mol[match(subsTx_table$Line.1,RCC_input_desc,nomatch = NA),]$Number + subsTx_table$L2 <- lu_mol[match(subsTx_table$Line.2,RCC_input_desc,nomatch = NA),]$Number + subsTx_table$L3 <- lu_mol[match(subsTx_table$Line.3,RCC_input_desc,nomatch = NA),]$Number + subsTx_table$L4 <- lu_mol[match(subsTx_table$Line.4,RCC_input_desc,nomatch = NA),]$Number + subsTx_table$L5 <- 999 + subs <- + subsTx_table[!is.na(Population), list( + Population, + L1, + L2, + L3, + L4, + L5, + Adj.proportion.given.line.1, + n + )] + subs$trt_n <- do.call(paste, c(subs[,paste0("L",1:5),with=FALSE], sep="→")) + subs$trt_n <- gsub("→NA","",subs$trt_n) + subs <- subs[,list(Population,L1,trt_n,Adj.proportion.given.line.1,n)] + + # split the table by population and first-line therapy to give us something + # to lapply through: + split_table <- split(subs, by=c("Population","L1")) + + # Each element in the lapply is then one pairing of population and first-line + # therapy. We can then generate our dirichlet draws for it as we know + # that the column Adj.proportion.given.line.1 sums to 1. Therefore we can + # generate diriclet draws from it by multiplying it by N + rbindlist(lapply(split_table, function(popL1) { + + # sum(popL1$Adj.proportion.given.line.1) will always be 1. Therefore, draw + # the n column then back calculate the proportion + + if (PSA == TRUE) { + draws <- gtools::rdirichlet(sims,alpha = popL1$n) + } else { + draws <- popL1$Adj.proportion.given.line.1 + } + + # QC proof: the below is true: + # c(t(draws))[1:ncol(draws)] == draws[1,] + + id <- as.list(popL1[,list(Population,L1,trt_n)]) + + # the PSA results use oo_pop as a numeric + + id$oo_pop <- as.numeric(gsub("pop","",id$Population)) + id$Population <- NULL + + id <- as.data.table(lapply(id, function(col) rep(col,sims))) + id$iteration <- rep(1:sims, each=nrow(popL1)) + id$Adj.proportion.given.line.1 <- c(t(draws)) + + # QC test 2: all grouped sums are equal to 1 (or extremely close due to floating point errors) + # all(round(id[,.(tst = sum(Adj.proportion.given.line.1)),by=list(Population,L1,iteration)]$tst,13) == 1) + id + + })) +} + + +f_psa_computeWAModelRes <- function(R_table_sub_txts_prop_n_costs, sims, lookups, psa_results, PSA = TRUE) { + + + weighting_table <- sub_tx_PSA_samples( + subsTx_table = data.table(R_table_sub_txts_prop_n_costs), + sims = sims, + lookups = lookups, + PSA = PSA + ) + + lu_pop <- lookups$pop_map + + weighting_table$oo_pop <- lu_pop[match(weighting_table$oo_pop, Sequencing.population.number,nomatch = NA),]$Overall.population.number + + add_pop_2 <- weighting_table[oo_pop==1] + add_pop_2$oo_pop <- 2 + + add_pop_5 <- weighting_table[oo_pop==4] + add_pop_5$oo_pop <- 5 + + weighting_table <- rbind(weighting_table, add_pop_2, add_pop_5) + + # merge in the weightings - note that some rows disappear because there's not a row + # in the weighting table for it! + # + # To show these rows up, use this: + + + res_tab <- + merge.data.table( + weighting_table, + psa_results, + by = c("oo_pop", "L1", "trt_n", "iteration"), + all.y = TRUE + ) + + + res_tab$Adj.proportion.given.line.1[is.na(res_tab$Adj.proportion.given.line.1)] <- 0 + + # QC test: tst should be 1 in the below + # res_tab[,.(tst = sum(Adj.proportion.given.line.1)),by=list(oo_pop,L1,iteration)] + + # EW edit: testing taking into account cPAS vs List price + # res_tab[,.(tst = sum(Adj.proportion.given.line.1)),by=list(oo_pop,L1,iteration,dd_drug_price_options)] + + # Apply the weightings for all iterations all in one go to all the outputs! + res_w <- res_tab[,`:=`( + qaly = qaly * Adj.proportion.given.line.1, + mol_0 = mol_0 * Adj.proportion.given.line.1, + mol_1 = mol_1 * Adj.proportion.given.line.1, + mol_2 = mol_2 * Adj.proportion.given.line.1, + mol_3 = mol_3 * Adj.proportion.given.line.1, + mol_4 = mol_4 * Adj.proportion.given.line.1, + mol_5 = mol_5 * Adj.proportion.given.line.1, + mol_6 = mol_6 * Adj.proportion.given.line.1, + mol_7 = mol_7 * Adj.proportion.given.line.1, + mol_8 = mol_8 * Adj.proportion.given.line.1, + mol_9 = mol_9 * Adj.proportion.given.line.1, + mol_10 = mol_10 * Adj.proportion.given.line.1, + mol_11 = mol_11 * Adj.proportion.given.line.1, + mol_12 = mol_12 * Adj.proportion.given.line.1, + mol_999 = mol_999 * Adj.proportion.given.line.1, + other_costs = other_costs * Adj.proportion.given.line.1, + LY = LY * Adj.proportion.given.line.1 + )] + + # now sum them up: + res_w_sum <- res_w[,.( + qaly = sum(qaly), + mol_0 = sum(mol_0), + mol_1 = sum(mol_1), + mol_2 = sum(mol_2), + mol_3 = sum(mol_3), + mol_4 = sum(mol_4), + mol_5 = sum(mol_5), + mol_6 = sum(mol_6), + mol_7 = sum(mol_7), + mol_8 = sum(mol_8), + mol_9 = sum(mol_9), + mol_10 = sum(mol_10), + mol_11 = sum(mol_11), + mol_12 = sum(mol_12), + mol_999 = sum(mol_999), + other_costs = sum(other_costs), + LY = sum(LY), + Adj.proportion.given.line.1 = sum(Adj.proportion.given.line.1) + ), by = list(oo_pop,L1,iteration, dd_drug_price_options)] + + #NOTE from EW to DL/DB 24-8-23: I've added in dd_drug_price_options here as the + #output from the HPC for the PSA includes this. Let me know if this creates + #problems elsewhere (recommend dd_drug_price_options added in as a column + #in res_tab if so) + + avg_res_w <- res_w_sum[,.( + mean_qaly = mean(qaly), + lb_qaly = quantile(qaly,prob=0.025), + ub_qaly = quantile(qaly,prob=0.975), + mean_mol_0 = mean(mol_0), + lb_mol_0 = quantile(mol_0,prob=0.025), + ub_mol_0 = quantile(mol_0,prob=0.975), + mean_mol_1 = mean(mol_1), + lb_mol_1 = quantile(mol_1,prob=0.025), + ub_mol_1 = quantile(mol_1,prob=0.975), + mean_mol_2 = mean(mol_2), + lb_mol_2 = quantile(mol_2,prob=0.025), + ub_mol_2 = quantile(mol_2,prob=0.975), + mean_mol_3 = mean(mol_3), + lb_mol_3 = quantile(mol_3,prob=0.025), + ub_mol_3 = quantile(mol_3,prob=0.975), + mean_mol_4 = mean(mol_4), + lb_mol_4 = quantile(mol_4,prob=0.025), + ub_mol_4 = quantile(mol_4,prob=0.975), + mean_mol_5 = mean(mol_5), + lb_mol_5 = quantile(mol_5,prob=0.025), + ub_mol_5 = quantile(mol_5,prob=0.975), + mean_mol_6 = mean(mol_6), + lb_mol_6 = quantile(mol_6,prob=0.025), + ub_mol_6 = quantile(mol_6,prob=0.975), + mean_mol_7 = mean(mol_7), + lb_mol_7 = quantile(mol_7,prob=0.025), + ub_mol_7 = quantile(mol_7,prob=0.975), + mean_mol_8 = mean(mol_8), + lb_mol_8 = quantile(mol_8,prob=0.025), + ub_mol_8 = quantile(mol_8,prob=0.975), + mean_mol_9 = mean(mol_9), + lb_mol_9 = quantile(mol_9,prob=0.025), + ub_mol_9 = quantile(mol_9,prob=0.975), + mean_mol_10 = mean(mol_10), + lb_mol_10 = quantile(mol_10,prob=0.025), + ub_mol_10 = quantile(mol_10,prob=0.975), + mean_mol_11 = mean(mol_11), + lb_mol_11 = quantile(mol_11,prob=0.025), + ub_mol_11 = quantile(mol_11,prob=0.975), + mean_mol_12 = mean(mol_12), + lb_mol_12 = quantile(mol_12,prob=0.025), + ub_mol_12 = quantile(mol_12,prob=0.975), + mean_mol_999 = mean(mol_999), + lb_mol_999 = quantile(mol_999,prob=0.025), + ub_mol_999 = quantile(mol_999,prob=0.975), + mean_other_costs = mean(other_costs), + lb_other_costs = quantile(other_costs,prob=0.025), + ub_other_costs = quantile(other_costs,prob=0.975), + mean_LY = mean(LY), + lb_LY = quantile(LY,prob=0.025), + ub_LY = quantile(LY,prob=0.975), + Adj.proportion.given.line.1 = mean(Adj.proportion.given.line.1) + ), by = list(oo_pop,L1, dd_drug_price_options)] + + + avg_res_w <- avg_res_w[Adj.proportion.given.line.1 != 0,] + + return(list( + weightings = weighting_table, + weighted = res_w, + weighted_average = res_w_sum, + mean_weighted_average = avg_res_w + )) + +} + + diff --git a/3_Functions/reporting/empty results doc.docx b/3_Functions/reporting/empty results doc.docx new file mode 100644 index 0000000..605c8dd Binary files /dev/null and b/3_Functions/reporting/empty results doc.docx differ diff --git a/3_Functions/reporting/word_document_output.R b/3_Functions/reporting/word_document_output.R new file mode 100644 index 0000000..afbc1eb --- /dev/null +++ b/3_Functions/reporting/word_document_output.R @@ -0,0 +1,1574 @@ +# output to word final.R +# +# Some edits were made in a temp script and these should be the final code + +#' Overarching function which produces the word document irrespective of model +#' structure. this function routes to either the ST or PS function depending on +#' `model_structure`, expecting the corresponding results set +#' +#' @param p input set for scenario. contains much of the pertinent information. +#' @param res results object. can be loaded directly from results `.rds` file +#' @param Scenario_name Usually taken from the excel book, named range `R_Scenario_name` +#' @param Scenario_number Usually taken from the excel book, named range `R_Scenario_num` +#' @param price_options Usually taken from the excel book, named range `dd_drug_price_options` +#' @param Run_date default `date()`. Allows custom string to be used instead +#' @param word_template_location location of the word document to start from. does not control styles (see `flextable` package). default `./3_Functions/reporting/empty results doc.docx` +#' @param Word_width_inches paragraph width in word document, used for table column distribution. default `29.7*0.3937=11.6721` +#' @param auto_save whether or not to automatically output a word document with automatic naming. if not, the word document object is returned within the R session +#' +#' +f_res_ProduceWordDoc <- function( + p, + res, + Scenario_name, + Scenario_number, + price_options, + Run_date = date(), + word_template_location = "./3_Functions/reporting/empty results doc.docx", + Word_width_inches = 29.7*0.3937, + auto_save = FALSE, + verbose = FALSE +) { + + # preamble + model_structure <- p$basic$structure + lookups <- p$basic$lookup + + for(pkg in c( + "shiny","gtools","openxlsx","flexsurv","tidyverse","data.table","heemod", + "logOfGamma","ggplot2","survminer","officer","officedown","magrittr","Hmisc", + "future.apply","crosstable","flextable","stringr","BCEA","collapse", + "scales","Matrix","dplyr")) {require(pkg,character.only = TRUE)} + + # Word document tempalte to start from: + doc_res <- read_docx(word_template_location) + + # Add a 1st level header for the overall population: + doc_res <- doc_res %>% + body_add_par(paste0("Results of Model Run in R Scenario name: " , Scenario_name),style = "heading 1") %>% + body_add_par(paste0("Date and time run: ", Run_date)) + + # Producing report tables (state transition model) ------------------------------------------------------ + + # Make a word document containing results tables using the object res + + # Produces a different format depending on model structure + + if(model_structure=="State transition") { + doc_res <- f_res_ProduceWordDoc_ST( + doc_res = doc_res, + res = res, + Scenario_name = Scenario_name, + Scenario_number = Scenario_number, + model_structure = model_structure, + pops_to_run = p$basic$pops_to_run, + ptchar = as.data.table(i$R_table_ptchar), + age_sex_source = i$dd_age_sex_source, + patientagesex = i$R_table_patientagesex, + lookups = lookups, + Run_date = Run_date, + Word_width_inches = Word_width_inches, + verbose = verbose + ) + } else { + doc_res <- + f_res_ProduceWordDoc_PS( + doc_res = doc_res, + res = res, + Scenario_name = Scenario_name, + Scenario_number = Scenario_number, + model_structure = model_structure, + pops_to_run = p$basic$pops_to_run, + ptchar = as.data.table(i$R_table_ptchar), + age_sex_source = i$dd_age_sex_source, + patientagesex = i$R_table_patientagesex, + lookups = lookups, + Run_date = Run_date, + Word_width_inches = Word_width_inches, + verbose = verbose + ) + } + + # If the user wants to save the document with automatic naming, then do so. Otherwise + # return the updated doc so the user can save it themselves. + if (auto_save) { + doc_target <- gsub(" ", "_",paste0("./4_Output/Scenario ",Scenario_number,"_",price_options,"_",gsub(":","_",Run_date),".docx")) + print(doc_res, target = doc_target) + rm(doc_res) + cat(paste0("Document automatically saved in location: ", doc_target,"\n")) + return(NULL) + } else { + return(doc_res) + } +} + + + +# Structure specific functions -------------------------------------------- + + +# ~ State transition (ST) ------------------------------------------------- + +#' Function to add results tables specific to cabo nivo no adjuvant population word document output +#' +#' @param doc_res initial document with first header added (see function `f_res_ProduceWordDoc`) +#' @param res results object. can be loaded directly from results `.rds` file +#' @param Scenario_name Usually taken from the excel book, named range `R_Scenario_name` +#' @param Scenario_number Usually taken from the excel book, named range `R_Scenario_num` +#' @param model_structure taken from `p`, in location `p$basic$structure`. Make sure it's correct! +#' @param pops_to_run taken from `p`, in location `p$basic$pops_to_run` +#' @param ptchar taken from `i`, in location `i$R_table_ptchar`, as a data.table. i.e., `as.data.table(i$R_table_ptchar)` +#' @param age_sex_source taken from `i`, in location `i$dd_age_sex_source` +#' @param patientagesex taken from `i`, in location `i$R_table_patientagesex` +#' @param lookups taken from `p` in location `p$basic$lookup` +#' @param Run_date default `date()`. Allows custom string to be used instead. No default as expected to be added in uses of `f_res_ProduceWordDoc` +#' @param Word_width_inches paragraph width in word document, used for table column distribution. No default as expected to be added in uses of `f_res_ProduceWordDoc` +#' +#' +#' +f_res_ProduceWordDoc_ST <- function( + doc_res, + res, + Scenario_name, + Scenario_number, + model_structure, + pops_to_run, + ptchar, + age_sex_source, + patientagesex, + lookups, + Run_date, + Word_width_inches, + verbose = FALSE +) { + + landscape <- prop_section(page_size = page_size(orient = "landscape")) + portrait <- prop_section(page_size = page_size(orient = "landscape")) + + # Shortened loookups for population and molecule + lu_pop <- lookups$pop_map + lu_rpop <- lookups$ipd$pop + lu_mol <- lookups$ipd$mol + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): " + )) + + # make a table by overall population for the summary results + ft_basic_bop <- do.call(rbind, lapply(structure( + names(res$wa_summarised), .Names = names(res$wa_summarised) + ), function(popu_txt) { + popu <- res$wa_summarised[[popu_txt]] + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + popu$L1 <- lu_mol[match(popu$L1, lu_mol$Number),]$Description + + return(popu) + + })) + + # Now do the same thing for the incremental analysis + ft_wa_inc <- + do.call(rbind, lapply(structure( + names(res$weighted_incremental), + .Names = names(res$weighted_incremental) + ), function(popu_txt) { + if (is.null(res$weighted_incremental[[popu_txt]]$non_dominated)) { + popu <- as.data.table(res$weighted_incremental[[popu_txt]]) + popu <- + data.table( + popu, + ic = 0, + iq = 0, + il = 0, + ICER = "Dominant" + ) + popu$str_dom <- NULL + + } else { + popu <- + as.data.table(res$weighted_incremental[[popu_txt]]$expanded_results) + popu$ICER[popu$extdom == FALSE] <- + as.character(paste0("£", round(popu$ICER[popu$extdom == FALSE] , 0))) + popu$ICER[popu$extdom == TRUE] <- "(ext dominated)" + popu$str_dom <- NULL + popu$extdom <- NULL + popu$r <- NULL + + } + + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + rep(lu_pop[Overall.population.number == popu_n,]$Risk.population, nrow(popu)) + + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + # Create table combining pairwise and incremental ICERs + setDT(ft_basic_bop)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_basic_bop <- ft_basic_bop[order(risk_pop, costs)] # order by increasing costs + setDT(ft_wa_inc)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_wa_inc <- ft_wa_inc[, c(1, 3, 2, 4, 5, 6, 7, 8, 9)] + ft_pairwise <- do.call(rbind, lapply(structure( + names(res$pairwise_vs_mol), .Names = names(res$pairwise_vs_mol) + ), function(popu_txt) { + popu <- res$pairwise_vs_mol[[popu_txt]] + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + + return(popu) + + })) + setDT(ft_pairwise)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_pairwise$Pairwise_ICER <- ft_pairwise$icer + ft_pairwise$Pairwise_ICER[is.na(ft_pairwise$icer) != TRUE] <- as.character(paste0("£", round(ft_pairwise$icer[is.na(ft_pairwise$icer) != TRUE] , 0))) + ft_pairwise[ft_pairwise$icer < 0 & ft_pairwise$iq < 0]$Pairwise_ICER <- "Cabo+nivo dominated" + ft_pairwise[ft_pairwise$icer < 0 & ft_pairwise$iq > 0]$Pairwise_ICER <- "Cabo+nivo dominant" + ft_pairwise[ft_pairwise$icer > 0 & ft_pairwise$iq < 0]$Pairwise_ICER <- paste0("SW quadrant ", ft_pairwise[ft_pairwise$icer > 0 & ft_pairwise$iq < 0]$Pairwise_ICER) + ft_pairwise <- ft_pairwise[, .SD, .SDcols = c("L1", "costs", "qalys", "ly", "Pairwise_ICER", "risk_pop")] + ft_wa_inc <- merge(ft_pairwise, ft_wa_inc, all.x = TRUE) + ft_wa_inc <- ft_wa_inc[, c(1, 2, 4, 3, 7, 9, 8, 6, 10, 5)] + ft_wa_inc <- ft_wa_inc[order(risk_pop, costs)] # order by increasing costs + ft_wa_inc[is.na(ICER)]$ICER <- "(dominated)" + + # Pull out nearest comparators + comparator_no_allrisk <- ff_closest_comparator(res, "pop_1") + comparator_no_favrisk <- ff_closest_comparator(res, "pop_2") + comparator_no_IPrisk <- ff_closest_comparator(res, "pop_3") + + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "LY breakdown" + ),31) + + # Create table for LY breakdown + + # All risk + cabo_nivo_LY_allrisk <- res$weighted_model_undisc$pop_1[L1 == 1] %>% select(starts_with("ly")) + comparator_LY_allrisk <- res$weighted_model_undisc$pop_1[L1 == comparator_no_allrisk] %>% select(starts_with("ly")) + ft_LY_all_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_allrisk, + comparator_outcomes = comparator_LY_allrisk, + comparator_no = comparator_no_allrisk, + LYorQALY = "LY" + ) + + # Fav risk + + cabo_nivo_LY_favrisk <- res$weighted_model_undisc$pop_2[L1 == 1] %>% select(starts_with("ly")) + comparator_LY_favrisk <- res$weighted_model_undisc$pop_2[L1 == comparator_no_favrisk] %>% select(starts_with("ly")) + ft_LY_fav_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_favrisk, + comparator_outcomes = comparator_LY_favrisk, + comparator_no = comparator_no_favrisk, + LYorQALY = "LY" + ) + + # Int/poor risk + cabo_nivo_LY_IPrisk <- res$weighted_model_undisc$pop_3[L1 == 1] %>% select(starts_with("ly")) + comparator_LY_IPrisk <- res$weighted_model_undisc$pop_3[L1 == comparator_no_IPrisk] %>% select(starts_with("ly")) + ft_LY_IP_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_LY_IPrisk, + comparator_outcomes = comparator_LY_IPrisk, + comparator_no = comparator_no_IPrisk, + LYorQALY = "LY" + ) + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "QALY breakdown" + ),33) + + # Create tables for QALY breakdown + + # All risk + + cabo_nivo_QALY_allrisk <- res$weighted_model_disc$pop_1[L1 == 1] %>% select(starts_with("qaly")) + cabo_nivo_AEQALY_allrisk <- cbind(res$weighted_model_disc$pop_1[L1 == 1] %>% select(starts_with("ae_qaly")),BSC = 0) + cabo_nivo_QALY_allrisk <- colSums(rbind(cabo_nivo_QALY_allrisk, cabo_nivo_AEQALY_allrisk, use.names = FALSE)) + cabo_nivo_QALY_allrisk <- cabo_nivo_QALY_allrisk[c(1:2, 9, 3:8)] + comparator_QALY_allrisk <- res$weighted_model_disc$pop_1[L1 == comparator_no_allrisk] %>% select(starts_with("qaly")) + comparator_AEQALY_allrisk <- cbind(res$weighted_model_disc$pop_1[L1 == comparator_no_allrisk] %>% select(starts_with("ae_qaly")), BSC = 0) + comparator_QALY_allrisk <- colSums(rbind(comparator_QALY_allrisk,comparator_AEQALY_allrisk,use.names = FALSE)) + comparator_QALY_allrisk <- comparator_QALY_allrisk[c(1:2, 9, 3:8)] + ft_QALY_all_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_allrisk, + comparator_outcomes = comparator_QALY_allrisk, + comparator_no = comparator_no_allrisk, + LYorQALY = "QALY" + ) + + + # Fav risk + cabo_nivo_QALY_favrisk <- res$weighted_model_disc$pop_2[L1 == 1] %>% select(starts_with("qaly")) + cabo_nivo_AEQALY_favrisk <- cbind(res$weighted_model_disc$pop_2[L1 == 1] %>% select(starts_with("ae_qaly")),BSC = 0) + cabo_nivo_QALY_favrisk <- colSums(rbind(cabo_nivo_QALY_favrisk, cabo_nivo_AEQALY_favrisk,use.names = FALSE)) + cabo_nivo_QALY_favrisk <- cabo_nivo_QALY_favrisk[c(1:2, 9, 3:8)] + comparator_QALY_favrisk <- res$weighted_model_disc$pop_2[L1 == comparator_no_favrisk] %>% select(starts_with("qaly")) + comparator_AEQALY_favrisk <- cbind(res$weighted_model_disc$pop_2[L1 == comparator_no_favrisk] %>% select(starts_with("ae_qaly")),BSC = 0) + comparator_QALY_favrisk <- colSums(rbind(comparator_QALY_favrisk,comparator_AEQALY_favrisk,use.names = FALSE)) + comparator_QALY_favrisk <- comparator_QALY_favrisk[c(1:2, 9, 3:8)] + ft_QALY_fav_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_favrisk, + comparator_outcomes = comparator_QALY_favrisk, + comparator_no = comparator_no_favrisk, + LYorQALY = "QALY" + ) + + + # Int/poor risk + cabo_nivo_QALY_IPrisk <- res$weighted_model_disc$pop_3[L1 == 1] %>% select(starts_with("qaly")) + cabo_nivo_AEQALY_IPrisk <- cbind(res$weighted_model_disc$pop_1[L1 == 1] %>% select(starts_with("ae_qaly")),BSC = 0) + cabo_nivo_QALY_IPrisk <- colSums(rbind(cabo_nivo_QALY_IPrisk, cabo_nivo_AEQALY_IPrisk, use.names =FALSE)) + cabo_nivo_QALY_IPrisk <- cabo_nivo_QALY_IPrisk[c(1:2, 9, 3:8)] + comparator_QALY_IPrisk <- res$weighted_model_disc$pop_3[L1 == comparator_no_IPrisk] %>% select(starts_with("qaly")) + comparator_AEQALY_IPrisk <- cbind(res$weighted_model_disc$pop_3[L1 == comparator_no_IPrisk] %>% select(starts_with("ae_qaly")),BSC = 0) + comparator_QALY_IPrisk <- colSums(rbind(comparator_QALY_IPrisk,comparator_AEQALY_IPrisk,use.names = FALSE)) + comparator_QALY_IPrisk <- comparator_QALY_IPrisk[c(1:2, 9, 3:8)] + ft_QALY_IP_tab <- ff_report_outcomes_breakdown( + cabo_nivo_outcomes = cabo_nivo_QALY_IPrisk, + comparator_outcomes = comparator_QALY_IPrisk, + comparator_no = comparator_no_IPrisk, + LYorQALY = "QALY" + ) + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Cost breakdown" + ),34) + + # Create tables for overall cost breakdown + cost_type <- c("drug" , "admin" , "mru" , "eol" , "ae_cost") + populations <- names(res$weighted_model_disc) + summary_costs_table <- + rbindlist(lapply(populations, function(popu) { + treatments <- res$weighted_model_disc[[popu]]$L1 + rbindlist(lapply(treatments, function(mol) { + ff_cost_table( + disc_results = res$weighted_model_disc, + trt_no = mol, + pop = popu + ) + })) + })) + + summary_costs_table[, risk_pop := str_replace(Population, "Int/poor", "Intermediate / poor risk")] + summary_costs_table[, Population := NULL] + summary_costs_table <- summary_costs_table[order(risk_pop, Total)] # order by increasing costs + ft_cost_tab <- summary_costs_table %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches / (ncol( + summary_costs_table + )))) %>% + add_header_row( + top = TRUE, + values = c("", "1L costs", "Subsequent treatment", "MRU", "", ""), + colwidths = c(1, 3, 3, 2, 1, 1) + ) %>% + theme_box() |> + set_header_labels( + values = list( + Treatment = "Technologies", + L1_drug = "Drug cost", + L1_admin = "Admin cost", + L1_ae = "AE cost", + subs_drug = "Drug cost", + subs_admin = "Admin cost", + subs_ae = "AE cost", + mru_1L = "1L", + subs_mru = "Subsequent treatment", + eol_cost = "EOL cost", + Total = "Total cost" + ) + ) %>% + colformat_double(j = c(2:11), + digits = 0, + prefix = "£") %>% + add_footer_lines( + "Abbreviations: admin, administration; AE, adverse event; EOL, end of life; MRU, medical resource use" + ) %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, + size = 10, + part = c("header")) %>% + fontsize(i = NULL, + size = 10, + part = c("body")) %>% + fontsize(i = NULL, + size = 9, + part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + align(i = NULL, + align = "center", + part = c("header")) %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + # produce break downs by population + intervention_name <- lu_mol[Number == 1]$Description + + # all risk + comparator_name <- lu_mol[Number == comparator_no_allrisk]$Description + cost_breakdown_2 <- rbind( + summary_costs_table[risk_pop == "All risk" & Treatment == intervention_name], + summary_costs_table[risk_pop == "All risk" & Treatment == comparator_name] + ) + + # reshape the data: + cb2 <- melt.data.table(cost_breakdown_2, id.vars = c("Treatment", "risk_pop")) + cb2$risk_pop <- NULL + cb2 <- dcast.data.table(cb2, variable ~ Treatment) + colnames(cb2) <- c("Type", "Int", "Comp") + cb2$Inc <- cb2[, Int] - cb2[, Comp] + cb2$abs <- abs(cb2$Inc) + cb2$abs[10] <- sum(cb2$abs[1:9]) + cb2$abspercent <- cb2$abs / cb2$abs[10] * 100 + cb2[, 1] <- + c( + "Drug acquisition cost (1L)", + "Admin cost (1L)", + "AE cost (1L)", + "Drug acquisition cost (2L+)", + "Admin cost (2L+)", + "AE cost (2L+)", + "MRU 1L", + "MRU 2L+", + "EOL", + "Total" + ) + cost_table_2_allrisk <- ff_cost_byrisk_table(cb2, comparator_no_allrisk) + + # favourable risk + comparator_name <- lu_mol[Number == comparator_no_favrisk]$Description + cost_breakdown_2 <- rbind( + summary_costs_table[risk_pop == "Favourable risk" & Treatment == intervention_name], + summary_costs_table[risk_pop == "Favourable risk" & Treatment == comparator_name] + ) + cost_breakdown_2 <- as.data.table(x = t(cost_breakdown_2),stringsAsFactors = FALSE) + cost_breakdown_2 <- cbind(colnames(summary_costs_table), cost_breakdown_2) + cost_breakdown_2 <- cost_breakdown_2[2:11,] + cost_breakdown_2[, 2:3] <- lapply(cost_breakdown_2[, 2:3], as.numeric) + colnames(cost_breakdown_2) <- c("Type", "Int", "Comp") + cost_breakdown_2$Inc <- cost_breakdown_2[, Int] - cost_breakdown_2[, Comp] + cost_breakdown_2$abs <- abs(cost_breakdown_2$Inc) + cost_breakdown_2$abs[10] <- sum(cost_breakdown_2$abs[1:9]) + cost_breakdown_2$abspercent <- cost_breakdown_2$abs / cost_breakdown_2$abs[10] * 100 + cost_breakdown_2[, 1] <- + c( + "Drug acquisition cost (1L)", + "Admin cost (1L)", + "AE cost (1L)", + "Drug acquisition cost (2L+)", + "Admin cost (2L+)", + "AE cost (2L+)", + "MRU 1L", + "MRU 2L+", + "EOL", + "Total" + ) + cost_table_2_favrisk <- ff_cost_byrisk_table(cost_breakdown_2, comparator_no_favrisk) + + # int / poor risk + comparator_name <- lu_mol[Number == comparator_no_IPrisk]$Description + cost_breakdown_2 <- rbind( + summary_costs_table[risk_pop == "Intermediate / poor risk" & Treatment == intervention_name], + summary_costs_table[risk_pop == "Intermediate / poor risk" & Treatment == comparator_name] + ) + cost_breakdown_2 <- as.data.table(x = t(cost_breakdown_2), stringsAsFactors = FALSE) + cost_breakdown_2 <- cbind(colnames(summary_costs_table), cost_breakdown_2) + cost_breakdown_2 <- cost_breakdown_2[2:11,] + cost_breakdown_2[, 2:3] <- lapply(cost_breakdown_2[, 2:3], as.numeric) + colnames(cost_breakdown_2) <- c("Type", "Int", "Comp") + cost_breakdown_2$Inc <- cost_breakdown_2[, Int] - cost_breakdown_2[, Comp] + cost_breakdown_2$abs <- abs(cost_breakdown_2$Inc) + cost_breakdown_2$abs[10] <- sum(cost_breakdown_2$abs[1:9]) + cost_breakdown_2$abspercent <- cost_breakdown_2$abs / cost_breakdown_2$abs[10] * 100 + cost_breakdown_2[, 1] <- + c( + "Drug acquisition cost (1L)", + "Admin cost (1L)", + "AE cost (1L)", + "Drug acquisition cost (2L+)", + "Admin cost (2L+)", + "AE cost (2L+)", + "MRU 1L", + "MRU 2L+", + "EOL", + "Total" + ) + cost_table_2_IPrisk <- ff_cost_byrisk_table(cost_breakdown_2, comparator_no_IPrisk) + + #### Scenario analysis tables + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Scenario tables" + ),35) + + # all risk + Scenario_table <- ff_scenario_output(res,Scenario_name,comparator_no_allrisk,"pop_1",model_structure) + Scenario_table_allrisk <- ff_scenario_table(Scenario_table) + + # favourable risk + Scenario_table <- ff_scenario_output(res,Scenario_name,comparator_no_favrisk,"pop_2",model_structure) + Scenario_table_favrisk <- ff_scenario_table(Scenario_table) + + # int/poor risk + Scenario_table <- ff_scenario_output(res,Scenario_name,comparator_no_IPrisk,"pop_3",model_structure) + Scenario_table_IPrisk <- ff_scenario_table(Scenario_table) + + # base case table + ft_basecase <- ft_wa_inc %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches / (ncol(ft_wa_inc)))) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + costs = "Costs (£)", + ly = "LYG", + qalys = "QALYs", + ic = "Inc. Costs", + il = "Inc. LYG", + iq = "Inc. QALYs", + Pairwise_ICER = "ICER cabo + nivo vs comparator", + ICER = "ICER incremental" + ) + ) %>% + flextable::colformat_double(j = c(2, 5, 8, 9), + digits = 0, + prefix = "£") %>% + flextable::colformat_double(j = c(3, 4, 6, 7), digits = 2) %>% + add_footer_lines( + "Abbreviations: ICER, incremental cost-effectiveness ratio; LYG, life-years gained; QALY, quality-adjusted life-year" + ) %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, + size = 10, + part = c("header")) %>% + fontsize(i = NULL, + size = 10, + part = c("body")) %>% + fontsize(i = NULL, + size = 9, + part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) + + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Severity modifier" + ),36) + + # Severity modifier + if(p$basic$decision_problem == "cabo+nivo") {pops_to_run <- 1:3} else {pops_to_run <- 1:3} + + population_numbers <- if(sum(pops_to_run == 1:3)>0){1:3} else{1:6} + res$mk$qaly_shortfall_1_to_3 <- lapply(population_numbers, function(npa_pop) { + + # npa_pop is overall population, we need to look up risk population from it: + + risk_pop_n <- lu_pop[match(npa_pop,lu_pop$Overall.population.number),]$Risk.population.number + risk_pop <- lu_rpop[match(risk_pop_n,lu_rpop$Number),]$Description + + if (age_sex_source == "Mean") { + + # So for this risk population, we need the baseline characteristics: + bl_chars <- ptchar[Population == risk_pop & Treatment.line == 1,] + bl_age <- bl_chars$Starting.age..years..Mean + bl_male <- 1-bl_chars$Starting...female.Mean + + } else { + + patient_sex_age_IPD <- as.data.table(patientagesex) + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="M","male") + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="F","female") + + bl_age <- patient_sex_age_IPD[Line ==1]$Age + bl_male <- patient_sex_age_IPD[Line ==1]$Gender + + } + + pna_txt <- names(res$wa_summarised)[npa_pop] + + tab <- res$wa_summarised[[pna_txt]][L1 != 1,] + + met <- tab[which.max(qalys),] + + q_met <- met$qalys + comp_no_met <- met$L1 + + out <- calc_severity_modifier( + age = bl_age, + sex = bl_male, + .patient_level = if(age_sex_source == "Mean") {FALSE} else {TRUE}, + qalys = q_met, + .i = i, + .p = p + ) + + out <- cbind(out, SOC = comp_no_met) + + return(out) + + }) + + + severity_table <- data.table(do.call(rbind, res$mk$qaly_shortfall_1_to_3)) + severity_table <- + cbind(risk_pop = lu_pop$Risk.population[1:3], severity_table) + severity_table <- rbind( + severity_table, + f_res_cabonivo_SevMod( + res = res, + oo_pop_string = "Poor / intermediate risk", + pop_n = 3, + comp_numb = 5 + ) + ) + severity_table <- rbind( + severity_table, + f_res_cabonivo_SevMod( + res = res, + oo_pop_string = "Poor / intermediate risk", + pop_n = 3, + comp_numb = 8 + ) + ) + setDT(severity_table)[, risk_pop := str_replace(risk_pop, "Favourable risk", "Fav")] + setDT(severity_table)[, risk_pop := str_replace(risk_pop, "All risk", "All")] + severity_table$SOC <- + unlist(lapply(1:nrow(severity_table), function(mol) { + lu_mol[Number == severity_table$SOC[mol]]$Description + })) + ft_severity_mod <- ff_severity_table(severity_table) + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Pairwise results" + ),37) + + # Scenario analysis pairwise results + ft_all_pairwise <- + do.call(rbind, lapply(structure( + names(res$pairwise_vs_mol), .Names = names(res$pairwise_vs_mol) + ), function(popu_txt) { + popu <- res$pairwise_vs_mol[[popu_txt]] + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + lu_pop[Overall.population.number == popu_n,]$Risk.population + + # popu$seq_pop <- seq_popu_lab + popu$risk_pop <- rsk_popu_lab + + return(popu) + + })) + + ft_all_pairwise$ICER[is.na(ft_all_pairwise$icer) != TRUE] <- + as.character(paste0("£", round(ft_all_pairwise$icer[is.na(ft_all_pairwise$icer) != TRUE] , 0))) + ft_all_pairwise[ft_all_pairwise$icer < 0 & + ft_all_pairwise$iq < 0]$ICER <- + "Cabo+nivo dominated" + ft_all_pairwise[ft_all_pairwise$icer < 0 & + ft_all_pairwise$iq > 0]$ICER <- + "Cabo+nivo dominant" + ft_all_pairwise[ft_all_pairwise$icer > 0 & + ft_all_pairwise$iq < 0]$ICER <- + paste0("SW quadrant ", ft_all_pairwise[ft_all_pairwise$icer > 0 & + ft_all_pairwise$iq < 0]$ICER) + ft_all_pairwise[, icer := NULL] + setDT(ft_all_pairwise)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + ft_all_pairwise_tab <- ff_scenario_pairwise_table(ft_all_pairwise, Word_width_inches) + + + # Outputting report (state transition) ------------------------------------------------------ + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Generating word document from results..." + ),40) + + # Add base case results. + doc_res <- doc_res %>% + body_add_table_legend(paste0("Base-case results (ordered in increasing cost)"), + bookmark = "tab1") %>% + body_add_flextable(ft_basecase, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + + body_add_break() + + + doc_res <- body_end_section_landscape(doc_res) + doc_res <- doc_res %>% + body_add_par("Qualification for the severity modifier", style = "heading 2") %>% + body_add_table_legend(paste0("Application of the severity modifier to the base case"), + bookmark = "tab2") %>% + body_add_flextable( + ft_severity_mod, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_par("Breakdowns by health state and cost category", style = "heading 2") %>% + body_add_table_legend( + paste0( + "Summary of LY gain by health state (all risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_allrisk]$Description, + ")" + ), + bookmark = "tab3" + ) %>% + body_add_flextable( + ft_LY_all_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of LY gain by health state (favourable risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_favrisk]$Description, + ")" + ), + bookmark = "tab4" + ) %>% + body_add_flextable( + ft_LY_fav_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of LY gain by health state (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_IPrisk]$Description, + ")" + ), + bookmark = "tab5" + ) %>% + body_add_flextable(ft_LY_IP_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of QALY gain by health state (all risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_allrisk]$Description, + ")" + ), + bookmark = "tab1" + ) %>% + body_add_flextable( + ft_QALY_all_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of QALY gain by health state (favourable risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_favrisk]$Description, + ")" + ), + bookmark = "tab6" + ) %>% + body_add_flextable( + ft_QALY_fav_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of QALY gain by health state (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_IPrisk]$Description, + ")" + ), + bookmark = "tab7" + ) %>% + body_add_flextable( + ft_QALY_IP_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + + doc_res <- body_end_section_portrait(doc_res) + + doc_res <- doc_res %>% + body_add_table_legend(paste0("Summary of costs by health state"), + bookmark = "tab8") %>% + body_add_flextable(ft_cost_tab, + align = "left", + topcaption = TRUE, + split = TRUE) %>% + body_add_break() + + doc_res <- body_end_section_landscape(doc_res) + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of predicted resource use by category of cost (all risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_allrisk]$Description, + ")" + ), + bookmark = "tab1" + ) %>% + body_add_flextable( + cost_table_2_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of predicted resource use by category of cost (favourable risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_favrisk]$Description, + ")" + ), + bookmark = "tab9" + ) %>% + body_add_flextable( + cost_table_2_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + paste0( + "Summary of predicted resource use by category of cost (intermediate / poor risk, cabo+nivo vs next best non-dominated comparator: " , + lu_mol[Number == comparator_no_IPrisk]$Description, + ")" + ), + bookmark = "tab10" + ) %>% + body_add_flextable( + cost_table_2_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + + population_table <- + as.data.table(cbind( + risk_pop = lu_pop$Risk.population[1:3], + pop_labels = c("pop_1", "pop_2", "pop_3") + )) + setDT(population_table)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + + + for (popu in population_table$pop_labels) { + for (mol in names(res$weighted_trace_plots[[popu]]$plots)) { + doc_res <- doc_res %>% body_add_figure_legend( + legend = paste0( + "Markov trace: ", + population_table[pop_labels == popu]$risk_pop , + ", ", + lu_mol[Number == str_sub(mol, -1, -1)]$Description + ), + bookmark = "fig1" + ) %>% + body_add_plot(print(res$weighted_trace_plots[[popu]]$plots[mol]), width = 6) %>% + body_add_par( + paste0( + "Abbreviations: L1, 1st line; L2, 2nd line; L3, 3rd line; L4, 4th line; L5, 5th line" + ), + style = "Table footnote" + ) %>% body_add_break() + + } + } + + doc_res <- doc_res %>% body_add_break() %>% + body_add_par("Cost-effectiveness acceptability frontiers", style = "heading 2") %>% + body_add_par( + paste0( + "Cost-effectiveness acceptability frontiers are presented for all non-dominated treatments for each of the risk groups" + ) + ) %>% + body_add_break() %>% + body_add_figure_legend( + legend = paste0("Cost-effectiveness acceptability frontier – all risk"), + bookmark = "fig2" + ) %>% + body_add_plot(print(res$weighted_incremental$pop_1$p), height = 4) %>% + body_add_par(paste0("Abbreviations: QALYs, quality-adjusted life-years"), + style = "Table footnote") + + doc_res <- doc_res %>% + body_add_figure_legend( + legend = paste0("Cost-effectiveness acceptability frontier – favourable risk"), + bookmark = "fig3" + ) %>% + body_add_plot(print(res$weighted_incremental$pop_2$p), height = 4) %>% + body_add_par(paste0("Abbreviations: QALYs, quality-adjusted life-years"), + style = "Table footnote") + + doc_res <- doc_res %>% + body_add_figure_legend( + legend = paste0( + "Cost-effectiveness acceptability frontier – intermediate / poor risk" + ), + bookmark = "fig4" + ) %>% + body_add_plot(print(res$weighted_incremental$pop_3$p), height = 4) %>% + body_add_par(paste0("Abbreviations: QALYs, quality-adjusted life-years"), + style = "Table footnote") %>% + + body_add_break() + + doc_res <- body_end_section_portrait(doc_res) + + + doc_res <- doc_res %>% + body_add_par("Scenario analysis style tables", style = "heading 1") %>% + body_add_table_legend(legend = paste0("Scenario analysis - all risk"), + bookmark = "tab11") %>% + body_add_flextable( + Scenario_table_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(legend = paste0("Scenario analysis - favourable risk"), + bookmark = "tab12") %>% + body_add_flextable( + Scenario_table_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis - intermediate / poor risk"), + bookmark = "tab13" + ) %>% + body_add_flextable( + Scenario_table_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis pairwise comparison table"), + bookmark = "tab14" + ) %>% + body_add_flextable( + ft_all_pairwise_tab , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + + # return the updated document: + return(doc_res) +} + + + +# ~ Partitioned survival (PS) --------------------------------------------- + +#' Function to add results tables specific to cabo nivo no adjuvant population word document output +#' +#' @param doc_res initial document with first header added (see function `f_res_ProduceWordDoc`) +#' @param res results object. can be loaded directly from results `.rds` file +#' @param Scenario_name Usually taken from the excel book, named range `R_Scenario_name` +#' @param Scenario_number Usually taken from the excel book, named range `R_Scenario_num` +#' @param model_structure taken from `p`, in location `p$basic$structure`. Make sure it's correct! +#' @param pops_to_run taken from `p`, in location `p$basic$pops_to_run` +#' @param ptchar taken from `i`, in location `i$R_table_ptchar`, as a data.table. i.e., `as.data.table(i$R_table_ptchar)` +#' @param age_sex_source taken from `i`, in location `i$dd_age_sex_source` +#' @param patientagesex taken from `i`, in location `i$R_table_patientagesex` +#' @param lookups taken from `p` in location `p$basic$lookup` +#' @param Run_date default `date()`. Allows custom string to be used instead. No default as expected to be added in uses of `f_res_ProduceWordDoc` +#' @param Word_width_inches paragraph width in word document, used for table column distribution. No default as expected to be added in uses of `f_res_ProduceWordDoc` +#' +#' +f_res_ProduceWordDoc_PS <- function( + doc_res, + res, + Scenario_name, + Scenario_number, + model_structure, + pops_to_run, + ptchar, + age_sex_source, + patientagesex, + lookups, + Run_date, + Word_width_inches, + verbose = FALSE +) { + + + landscape <- prop_section(page_size = page_size(orient = "landscape")) + portrait <- prop_section(page_size = page_size(orient = "landscape")) + + # Shortened loookups for population and molecule + lu_pop <- lookups$pop_map + lu_mol <- lookups$ipd$mol + + # Producing report tables (PartSA) ------------------------------------------------------ + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): " + )) + + # Make LY table + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "LYs" + ),col_num = 31) + + PartSA_Lys <- + do.call(rbind, lapply(structure(names(res$ly), .Names = names(res$ly)), function(popu_txt) { + popu <- as.data.table(res$ly[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + popu <- data.table(L1 = rownames(res$ly[[popu_txt]]), popu) + popu$L1 <- + lu_mol[match(popu$L1, lu_mol$RCC_input_desc), ]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_Lys)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_Lys$Total <- rowSums(PartSA_Lys[, 2:5]) + + PartSA_LYs_table <- ff_PartSALY_table(PartSA_Lys) + + # Make QALYs table + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "QALYs" + ),col_num = 33) + + PartSA_QALYs <- + do.call(rbind, lapply(structure( + names(res$disc_qaly), .Names = names(res$disc_qaly) + ), function(popu_txt) { + popu <- as.data.table(res$disc_qaly[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + popu <- + data.table(L1 = rownames(res$disc_qaly[[popu_txt]]), popu) + popu$L1 <- + lu_mol[match(popu$L1, lu_mol$RCC_input_desc), ]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_QALYs)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_QALYs$Total <- rowSums(PartSA_QALYs[, 2:5]) + + PartSA_QALYs_table <- ff_PartSAQALY_table(PartSA_QALYs) + + # Make costs table + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Costs" + ),col_num = 34) + + PartSA_costs <- + do.call(rbind, lapply(structure( + names(res$disc_cost), .Names = names(res$disc_cost) + ), function(popu_txt) { + popu <- as.data.table(res$disc_cost[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + popu <- + data.table(L1 = rownames(res$disc_cost[[popu_txt]]), popu) + popu$L1 <- + lu_mol[match(popu$L1, lu_mol$RCC_input_desc), ]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + setDT(PartSA_costs)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + PartSA_costs$Total <- rowSums(PartSA_costs[, 2:11]) + + PartSA_costs_table <- ff_PartSAcost_table (PartSA_costs) + + # Make results table + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Results tables" + ),col_num = 35) + + PartSA_wa <- + do.call(rbind, lapply(structure( + names(res$incremental), .Names = names(res$incremental) + ), function(popu_txt) { + if (is.null(res$incremental[[popu_txt]]$non_dominated)) { + popu <- as.data.table(res$incremental[[popu_txt]]) + popu <- + data.table( + popu, + ic = 0, + iq = 0, + il = 0, + ICER = "Dominant" + ) + popu$str_dom <- NULL + + } else { + popu <- as.data.table(res$incremental[[popu_txt]]$expanded_results) + popu$ICER[popu$extdom == FALSE] <- + as.character(paste0("£", round(popu$ICER[popu$extdom == FALSE] , 0))) + popu$ICER[popu$extdom == TRUE] <- "(ext dominated)" + popu$str_dom <- NULL + popu$extdom <- NULL + popu$r <- NULL + + } + + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + PartSA_wa <- PartSA_wa[, c(2, 3, 1, 4, 5, 7, 6, 8, 9)] + + + + PartSA_totals <- + do.call(rbind, lapply(structure( + names(res$tables$top_line), .Names = names(res$tables$top_line) + ), function(popu_txt) { + popu <- as.data.table(res$tables$top_line[[popu_txt]]) + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- + rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + popu$L1 <- lu_mol[match(popu$L1, lu_mol$Number), ]$Description + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + PartSA_totals <- PartSA_totals[order(risk_pop, costs)] # order by increasing costs + + PartSA_totals$L1_risk <- paste(PartSA_totals$L1, PartSA_totals$risk_pop) + PartSA_wa$L1_risk <- paste(PartSA_wa$L1, PartSA_wa$risk_pop) + + PartSA_results <- merge(PartSA_totals, PartSA_wa, all.x = TRUE) + PartSA_results[is.na(ICER)]$ICER <- "(dominated)" + + PartSA_results <- PartSA_results[, c(4, 1, 3, 2, 7, 8, 9, 10, 5)] + + PartSA_results <- + PartSA_results[order(risk_pop, costs)] # order by increasing costs + + + PartSA_Pairwise <- + do.call(rbind, lapply(structure( + names(res$tables$top_line), .Names = names(res$tables$top_line) + ), function(popu_txt) { + popu <- f_res_ICER_pairwiseVsoneTrt(res$tables$top_line[[popu_txt]], 1, lu_mol) + popu_n <- as.numeric(gsub("pop_", "", popu_txt)) + + # seq_popu_lab <- lu_pop[Overall.population.number == popu_n,]$Sequencing.population + rsk_popu_lab <- rep(lu_pop[Overall.population.number == popu_n, ]$Risk.population, nrow(popu)) + + # popu$seq_pop <- seq_popu_lab + popu <- cbind(popu, risk_pop = rsk_popu_lab) + + return(popu) + + })) + + + PartSA_Pairwise$Pairwise_ICER[is.na(PartSA_Pairwise$icer) != TRUE] <- as.character(paste0( + "£", + round(PartSA_Pairwise$icer[is.na(PartSA_Pairwise$icer) != TRUE] , 0)) + ) + + + PartSA_Pairwise[PartSA_Pairwise$icer < 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER <- "Cabo+nivo dominated" + PartSA_Pairwise[PartSA_Pairwise$icer < 0 & PartSA_Pairwise$iq > 0]$Pairwise_ICER <- "Cabo+nivo dominant" + PartSA_Pairwise[PartSA_Pairwise$icer > 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER <- paste0( + "SW quadrant ", + PartSA_Pairwise[PartSA_Pairwise$icer > 0 & PartSA_Pairwise$iq < 0]$Pairwise_ICER + ) + + PartSA_Pairwise_Scen <- PartSA_Pairwise + PartSA_Pairwise <- PartSA_Pairwise[, c(4, 9, 10)] + + + PartSA_results <- merge(PartSA_results, PartSA_Pairwise, all.x = TRUE) + + PartSA_results <- PartSA_results[, c(1:8, 10, 9)] + + PartSA_results[ICER == 0]$ICER <- "(dominated)" + PartSA_results[, 6] <- as.numeric(unlist(PartSA_results[, 6][[1]])) + PartSA_results[, 7] <- as.numeric(unlist(PartSA_results[, 7][[1]])) + PartSA_results[, 8] <- as.numeric(unlist(PartSA_results[, 8][[1]])) + + PartSA_results <- PartSA_results[order(risk_pop, costs)] # order by increasing costs + setDT(PartSA_results)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + PartSA_results_tab <- ff_PartSAresults_table(PartSA_results) + + #### Scenario analysis tables + comparator_no_allrisk <- ff_closest_comparator_PartSA(res, "pop_1") + comparator_no_favrisk <- ff_closest_comparator_PartSA(res, "pop_2") + comparator_no_IPrisk <- ff_closest_comparator_PartSA(res, "pop_3") + + # all risk + Scenario_table <- ff_scenario_output(res,Scenario_name,comparator_no_allrisk,"pop_1",model_structure) + Scenario_table_allrisk <- ff_scenario_table(Scenario_table) + + # favourable risk + Scenario_table <-ff_scenario_output(res,Scenario_name,comparator_no_favrisk,"pop_2",model_structure) + Scenario_table_favrisk <- ff_scenario_table(Scenario_table) + + # int/poor risk + + Scenario_table <- ff_scenario_output(res,Scenario_name,comparator_no_IPrisk,"pop_3",model_structure) + Scenario_table_IPrisk <- ff_scenario_table(Scenario_table) + + # Scenario analysis pairwise results + + setDT(PartSA_Pairwise_Scen)[, risk_pop := str_replace(risk_pop, "Int/poor", "Intermediate / poor risk")] + + PartSA_Pairwise_Scen <- PartSA_Pairwise_Scen[, c(4, 1, 2, 3, 5, 6, 7, 10, 9)] + PartSA_Pairwise_Scen$ICER <- PartSA_Pairwise_Scen$Pairwise_ICER + PartSA_Pairwise_Scen$Pairwise_ICER <- NULL + + ft_all_pairwise_tab <- ff_scenario_pairwise_table(PartSA_Pairwise_Scen, Word_width_inches) + + + # Outputting report (PartSA) ------------------------------------------------------ + + if (verbose) f_misc_colcat(paste0( + "Word output for ", + Scenario_name, + ". (scen #", Scenario_number,"): ", + "Generating word document..." + ),col_num = 36) + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA life years"), + bookmark = "tab1") %>% + body_add_flextable( + PartSA_LYs_table, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA QALYs"), + bookmark = "tab2") %>% + body_add_flextable( + PartSA_QALYs_table, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA costs"), + bookmark = "tab3") %>% + body_add_flextable( + PartSA_costs_table, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend(paste0("PartSA results (ordered in increasing cost)"), + bookmark = "tab4") %>% + body_add_flextable( + PartSA_results_tab, + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_par("Scenario analysis style table", style = "heading 1") %>% + body_add_table_legend(legend = paste0("Scenario analysis style table - all risk"), + bookmark = "tab5") %>% + body_add_flextable( + Scenario_table_allrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis style table - favourable risk"), + bookmark = "tab6" + ) %>% + body_add_flextable( + Scenario_table_favrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis style table - intermediate / poor risk"), + bookmark = "tab7" + ) %>% + body_add_flextable( + Scenario_table_IPrisk , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + doc_res <- doc_res %>% + body_add_table_legend( + legend = paste0("Scenario analysis pairwise comparison table"), + bookmark = "tab8" + ) %>% + body_add_flextable( + ft_all_pairwise_tab , + align = "left", + topcaption = TRUE, + split = TRUE + ) %>% + body_add_break() + + return(doc_res) + +} + + + +# TESTING AREA ------------------------------------------------------------ + + +# Testing area, set to not run but can be used to assign variables and run through +# the function definition line by line: + +if (FALSE) { + # Load all libraries + library(shiny, quiet = TRUE) + library(gtools, quiet = TRUE) + library(openxlsx, quiet = TRUE) + library(flexsurv, quiet = TRUE) + library(tidyverse, quiet = TRUE) + library(data.table, quiet = TRUE) + library(heemod, quiet = TRUE) + library(logOfGamma, quiet = TRUE) + library(ggplot2, quiet = TRUE) + library(survminer, quiet = TRUE) + library(officer, quiet = TRUE) + library(officedown, quiet = TRUE) + library(magrittr, quiet = TRUE) + library(Hmisc, quiet = TRUE) + library(future.apply, quiet = TRUE) + library(crosstable, quiet = TRUE) + library(flextable, quiet = TRUE) + library(stringr, quiet = TRUE) + library(BCEA, quiet = TRUE) + library(collapse, quiet = TRUE) + library(scales, quiet = TRUE) + library(Matrix, quiet = TRUE) + library(dplyr, quiet = TRUE) + + # Load all functions: + source("./3_Functions/excel/extract.R") + source("./3_Functions/sequencing/sequences.R") + source("./3_Functions/survival/Survival_functions.R") + source("./3_Functions/survival/other_cause_mortality.R") + source("./3_Functions/survival/treatment_effect_waning.R") + source("./3_Functions/misc/other.R") + source("./3_Functions/misc/shift_and_pad.R") + source("./3_Functions/misc/cleaning.R") + source("./3_Functions/misc/nesting.R") + source("./3_Functions/misc/discounting.R") + source("./3_Functions/misc/qdirichlet.R") + source("./3_Functions/misc/plotting.R") + source("./3_Functions/misc/structure.R") + source("./3_Functions/utility/age_related.R") + source("./3_Functions/costs_and_QALYs/utility_processing.R") + source("./3_Functions/adverse_events/AE_steps.R") + source("./3_Functions/costs_and_QALYs/cost_processing.R") + source("./3_Functions/markov/markov.R") + source("./3_Functions/patient_flow/overarching.R") + source("./3_Functions/patient_flow/partitioned_survival.R") + source("./3_Functions/patient_flow/markov.R") + source("./3_Functions/patient_flow/drug_costs.R") + source("./3_Functions/patient_flow/hcru_costs.R") + source("./3_Functions/patient_flow/qalys.R") + source("./3_Functions/patient_flow/ae.R") + source("./3_Functions/results/incremental_analysis.R") + source("./3_Functions/results/model_averaging.R") + source("./3_Functions/results/partitioned_survival.R") + source("./3_Functions/misc/severity_modifier.R") + source("./3_Functions/results/results_tables.R") + + # Load required variables from file: + i <- readRDS("./2_Scripts/standalone scripts/QC/i.rds") + p <- readRDS("./2_Scripts/standalone scripts/QC/p.rds") + res <- readRDS("~/Downloads/PATT-Pathways-RCC/4_Output/results_scenario_RevisedBC_2Aug2023.rds") + + # Test the function + f_res_ProduceWordDoc( + p = p, + res = res, + Scenario_name = "Test scenario (base-case)", + Scenario_number = i$R_Scenario_num, + price_options = i$dd_drug_price_options, + Run_date = date(), + word_template_location = "./3_Functions/reporting/empty results doc.docx", + Word_width_inches = 29.7*0.3937, + auto_save = TRUE + ) +} + + diff --git a/3_Functions/results/incremental_analysis.R b/3_Functions/results/incremental_analysis.R new file mode 100644 index 0000000..7f6a5ee --- /dev/null +++ b/3_Functions/results/incremental_analysis.R @@ -0,0 +1,225 @@ +#' Function to produce incremental analysis results +#' +#' @param res_tab the results from the markov model (e.g. res$mk$disc$pop_3$res) +#' @param produce_plot whether or not to produce an efficiency frontier plot +#' +#' +#' @details The plot has a lot of dots on it so sequences are numbered. the numbers +#' correspond to the column `r` in the expanded_results table. one can take the +#' plot and change the labelling or produce a separate plot from the various tables +#' returned. +#' +#' +f_res_mk_incremental <- function(res_d, res_ud,lu_mol, produce_plot = TRUE, no_active_lines = NULL, output_weighted = "No") { + + out <- list() + + # Sort by cost in descending order: + if ("trt" %in% colnames(res_ud)) { + ru <- res_ud[,-c("costs","qalys","trt")] + } else { + ru <- res_ud[,-c("costs","qalys")] + } + r <- merge.data.table(res_d,ru) + r <- r[order(costs),] + + # Translate molecule number to full name to make the output more readable. + # Only do this when we're looking at 1st line treatments (weighted models): + if ("L1" %in% colnames(r)) { + r$L1 <- lu_mol[match(r$L1, lu_mol$Number)]$Description + } else { + r$L1 <- gsub("→","|",r$trt_n) + } + + # Filter according to the number of active lines + if(output_weighted == "No") { + if (!is.null(no_active_lines)) { + r <- r[str_count(r$trt_n, pattern = "→")<(no_active_lines+1)] + } + } + + # Identify strictly dominated strategies, i.e. for each row are there any + # other rows which have both lower costs and higher QALYs + r$str_dom <- FALSE + + # If for any treatment a worse treatment is more expensive, then it's dominated strictly + + str_dom_list <- lapply(nrow(r):1, function(tx) { + + c <- r$costs[tx] + q <- r$qalys[tx] + + if(tx == nrow(r)) { + return(q < max(r$qalys)) + } else { + dat <- r[-tx,] + + dat$t_c <- dat$costs <= c + dat$t_q <- dat$qalys >= q + + dat$tt <- dat$t_c & dat$t_q + + return(any(dat$tt)) + } + }) + + r$str_dom <- unlist(str_dom_list)[nrow(r):1] + + + if(sum(!r$str_dom) == 1) { + r <- r[str_dom == FALSE,] + warning(paste0("Molecule ", r$L1, " as first line therapy strictly dominates all other strategies...")) + return(r) + } + + # not strictly dominated strategies + stdom <- r[str_dom == TRUE,] + nsddom <- r[str_dom == FALSE,] + + + # Extended dominance is iterative by nature, requiring pairwise ICERs to be + # calcualted repeatedly after eliminating strategies one at a time: + + nsddom$extdom <- FALSE + nsddom$ic <- 0 + nsddom$iq <- 0 + nsddom$il <- 0 + nsddom$ICER <- 0 + nsddom$ICER[length(nsddom$ICER)] <- NA + nsddom <- nsddom[order(qalys),] + nsddom$r <- 1:nrow(nsddom) + + out$not_strictly_dominated <- nsddom + + # Order checks for QC - they all go in increasing order in cost and qalys (otherwise + # some would be strdom by a cheaper and better strat) + + extdom <- Reduce( + x = 1:10, + accumulate = FALSE, + init = nsddom, + function(not_strictly_dominated, dos) { + + + after_this_pass <- Reduce( + x = 1:(nrow(not_strictly_dominated)-1), + init = not_strictly_dominated, + accumulate = FALSE, + f = function(prev, comp_tx) { + + + + # For each strategy, calculate the lowest ICER compared to any other + # which is more effective than it is, which should be the rows below it. + # this can be done in reverse order cycling through interventions from the + # bottom of the table upwards, calculating the lowest ICER to any other strategy + # that at the time isn't dominated. + + # If this strategy is extendedly dominated, just return the table as we + # don't need to do anything further + if (prev$extdom[comp_tx] == TRUE) return(prev) + + ned <- prev[extdom == FALSE & r %in% comp_tx:nrow(prev),] + + # Pull out the comparator: + comp <- prev[r == comp_tx,] + + # Calculate pairwise ICERs against all cheaper and less effective treatments + # compared to this one, that are not themselves dominated: + ic <- ned[r > comp_tx]$costs - comp$costs + iq <- ned[r > comp_tx]$qalys - comp$qalys + il <- ned[r > comp_tx]$ly - comp$ly + + ICERs <- ic/iq + + ned$ic <- c(NA,ic) + ned$iq <- c(NA,iq) + ned$il <- c(NA,il) + ned$ICER <- c(NA,ICERs) + which_best_ICER <- ned[which.min(ICER),]$r + + # Locate the best ICER (efficiency frontier) for this iteration + ned$best_ICER <- FALSE + ned[r == which_best_ICER,]$best_ICER <- TRUE + + # Figure out if there is any extended dominance to add to the table + # prev$testboi <- between(prev$r,which_best_ICER,int_tx-1,incbounds = FALSE) + if (which_best_ICER > (comp_tx+1)) { + prev$extdom[data.table::between(prev$r,comp_tx,which_best_ICER,incbounds = FALSE)] <- TRUE + prev$ic[data.table::between(prev$r,comp_tx,which_best_ICER,incbounds = FALSE)] <- NA + prev$iq[data.table::between(prev$r,comp_tx,which_best_ICER,incbounds = FALSE)] <- NA + prev$il[data.table::between(prev$r,comp_tx,which_best_ICER,incbounds = FALSE)] <- NA + prev$ICER[data.table::between(prev$r,comp_tx,which_best_ICER,incbounds = FALSE)] <- NA + if (which_best_ICER + 1 < nrow(prev)) { + prev$ICER[(which_best_ICER+1):nrow(prev)] <- NA + } + prev[r == which_best_ICER,]$ic <- ned[best_ICER == TRUE,]$ic + prev[r == which_best_ICER,]$iq <- ned[best_ICER == TRUE,]$iq + prev[r == which_best_ICER,]$il <- ned[best_ICER == TRUE,]$il + prev[r == which_best_ICER,]$ICER <- ned[best_ICER == TRUE,]$ICER + } else { + # it's just the next treatment along in the reduced table. there's no + # extended dominance to add + prev[r == which_best_ICER,]$ic <- ned[r == which_best_ICER,]$ic + prev[r == which_best_ICER,]$iq <- ned[r == which_best_ICER,]$iq + prev[r == which_best_ICER,]$il <- ned[r == which_best_ICER,]$il + prev[r == which_best_ICER,]$ICER <- ned[r == which_best_ICER,]$ICER + } + return(prev) + }) + + return(after_this_pass) + } + ) + + out$expanded_results <- extdom + + # Non-dominated strategies and ICERs between them + reduced_table <- extdom[extdom == FALSE,] + + out$non_dominated <- reduced_table[,-c("str_dom","extdom","r")] + + if (produce_plot) { + out$p <- ggplot(nsddom, aes(x = qalys, y = costs, colour = as.factor(L1), label = r)) + + geom_point() + + theme_classic() + + # ggrepel::geom_text_repel(max.overlaps = 100, alpha = 0.2) + + geom_line(data = reduced_table, aes(x=qalys,y=costs,colour=NULL)) + + # ggrepel::geom_label_repel( + # data = reduced_table, + # # arrow = arrow(ends = "last",type = "closed"), + # aes( + # x = qalys, + # y = costs, + # colour = NULL, + # label = as.factor(L1), + # ))+ + theme(legend.position = "bottom") + + scale_x_continuous(limits = c(0,max(nsddom$qalys)), expand = expansion(mult = c(0,0.05))) + + scale_y_continuous( + limits = c(0, max(nsddom$costs)), + expand = expansion(mult = c(0, 0.05)), + labels = label_dollar(prefix = "£") + ) + + labs(x= "QALYs", y = "Costs") + + theme(legend.title = element_blank()) + } + + return(out) +} + + + + + +# testing ground ---------------------------------------------------------- + +if (FALSE) { + f_res_mk_incremental(res_tab = res$mk$disc$pop_1$res) + f_res_mk_incremental(res$mk$disc$pop_2$res) + f_res_mk_incremental(res$mk$disc$pop_3$res) + f_res_mk_incremental(res$mk$disc$pop_4$res) + f_res_mk_incremental(res$mk$disc$pop_5$res) + f_res_mk_incremental(res$mk$disc$pop_6$res) +} + diff --git a/3_Functions/results/model_averaging.R b/3_Functions/results/model_averaging.R new file mode 100644 index 0000000..1e63535 --- /dev/null +++ b/3_Functions/results/model_averaging.R @@ -0,0 +1,534 @@ +#' Function to generate weighted average cost and QALY breakdown per OVERALL +#' population. +#' +#' +#' @param res_obj Model results from the ST model, usually `res$mk`, but if loaded in via rds file, then that. +#' @param pop_oo numeric of length one. overall_pop corresponding to the lookup table `p$basic$lookup$pop_map` +#' @param subs `p$costs$settings$subsTx`, which is named range `R_table_sub_txts_prop_n_costs` from Excel (containing the weightings) +#' @param ptchar named range `R_table_ptchar` from excel (patient characteristics) +#' @param disc logical. discounted or undiscounted results +#' @param lookups usually `p$basic$lookup` containing all the lookup tables to translate numerical id's into labels +#' @param `no_active_lines` number of active lines coming from the Excel input +#' @param `max_lines` maximum number of lines including BSC, set to 5 by default +#' +f_res_wa_model <- + function(res_obj, + pop_oo, + subs, + ptchar = NULL, + disc, + lookups, + no_active_lines, + max_lines) { + + stopifnot(no_active_lines %in% 2:4) + + # Lookup tables: + lu_mol <- lookups$ipd$mol + lu_pop <- lookups$pop_map + + # Weighting table: + # The subsTx table only needs to be computed once, so just get it done: + subs$L1 <- lu_mol[match(subs$Line.1,RCC_input_desc,nomatch = NA),]$Number + subs$L2 <- lu_mol[match(subs$Line.2,RCC_input_desc,nomatch = NA),]$Number + subs$L3 <- lu_mol[match(subs$Line.3,RCC_input_desc,nomatch = NA),]$Number + subs$L4 <- lu_mol[match(subs$Line.4,RCC_input_desc,nomatch = NA),]$Number + subs$L5 <- 999 + subs <- + subs[!is.na(Population), list( + Population, + L1, + L2, + L3, + L4, + L5, + Adj.proportion.given.line.1 + )] + + # Checking the maximum number of active lines from the Excel input file and creating a subsetted filed which sums up the number receiving each possible pathway + # after the maximum line and assigns it to the prior treatment + + if (no_active_lines ==3 ) { + new_subs <- subs[, sum(Adj.proportion.given.line.1), by = .(Population, L1, L2, L3)] + colnames(new_subs)[colnames(new_subs) == 'V1'] <- "Adj.proportion.given.line.1" + new_subs <- cbind(new_subs, L4 = rep("NA", nrow(new_subs)), L5 = rep(999, nrow(new_subs))) + + } + if (no_active_lines ==2 ) { + new_subs <- subs[, sum(Adj.proportion.given.line.1), by = .(Population, L1, L2)] + colnames(new_subs)[colnames(new_subs) == 'V1'] <- "Adj.proportion.given.line.1" + new_subs <- cbind(new_subs, L3 = rep("NA", nrow(new_subs)),L4 = rep("NA", nrow(new_subs)), L5 = rep(999, nrow(new_subs))) + + } + + + # Compile trt_n to match with the breakdown table + subs$trt_n <- do.call(paste, c(subs[,paste0("L",1:max_lines),with=FALSE], sep="→")) + + + # now we have a unique identified assigned for each sequence assign the new values taking into account the maximum number of lines + + if (no_active_lines <4) { + new_subs$trt_n <- do.call(paste, c(new_subs[, paste0("L", 1:max_lines), with = FALSE], sep ="→")) + subs$Adj.proportion.given.line.1 <- new_subs$Adj.proportion.given.line.1[match(subs$trt_n, new_subs$trt_n)] + subs$Adj.proportion.given.line.1[is.na(subs$Adj.proportion.given.line.1)] <- 0 + } + + subs$trt_n <- gsub("→NA","",subs$trt_n) + subs <- subs[,list(Population,L1,trt_n,Adj.proportion.given.line.1)] + + # Finally, filter down to only the overall population we're doing + pop_s_n <- lu_pop[match(pop_oo, lu_pop$Overall.population.number)]$Sequencing.population.number + + subs <- subs[Population == paste0("pop",pop_s_n),] + + # Moving onto the results to proces for all sequences for this population: + stopifnot(disc %in% c(TRUE,FALSE)) + + # Determine which objects to pull out of the results object (discounted or undisc) + if (disc) { + d <- "disc" + } else { + d <- "undisc" + res_ly <- res_obj[[d]][[paste0("pop_",pop_oo)]]$ly$breakdown + } + + # Pull out the full breakdown table for this OVERALL population, which has + # outcomes by line and category for costs and qalys + res_fbd <- res_obj[[d]][[paste0("pop_",pop_oo)]]$full_breakdowns + + + # work out max dimensions for individual breakdown tables: + max_rows_c <- max_lines + max_rows_q <- (max_lines)*2 + + + # cycle through the full breakdown applying weightings, then pivoting the tables + # wider. It should end up 1 row data.table with trt_n as one column. + # (fbdt = full breakdown table) + fbdt <- rbindlist( + lapply(res_fbd, function(tx) { + tx_li <- nrow(tx$cost) + + # expand tx$cost to all possible treatment lines if required + if (tx_li < max_rows_c) { + tx$cost <- rbind(tx$cost, + matrix( + rep(0, ncol(tx$cost) * (max_rows_c - tx_li)), + ncol = ncol(tx$cost), + nrow = max_rows_c - tx_li, + dimnames = list(paste0("line_", (nrow( + tx$cost + ) + 1):( + nrow(tx$cost) + (max_rows_c - tx_li) + )), colnames(tx$cost)) + )) + eol_temp <- max(tx$cost[,"eol"]) + tx$cost[,"eol"] <- c(rep(0,max_rows_c-1),eol_temp) + } + + # Do the same for QALYs, which has 2 for each active treatment line + # + BSC + dead + tx_liq <- nrow(tx$qaly) + if (tx_liq < max_rows_q) { + rows_to_add <- max_rows_q - tx_liq + n_lines <- (nrow(tx$qaly) - 2) / 2 + + # names for extra columns + li_txt <- unlist(lapply(paste0("L", (n_lines + 1):(max_lines-1)), function(x) + paste0(x, c("_on", "_off")))) + + tx$qaly <- rbind(tx$qaly, + matrix( + data = rep(0, length(li_txt) * ncol(tx$qaly)), + ncol = ncol(tx$qaly), + nrow = length(li_txt), + dimnames = list(li_txt, colnames(tx$qaly)) + )) + + tx$qaly <- tx$qaly[c( + rownames(tx$qaly)[grep("L", rownames(tx$qaly))], + rownames(tx$qaly)[grep("L", rownames(tx$qaly), invert = TRUE)]), + ] + } + + # Now the tables have been padded out, we can multiply by the weighting + # and spread them wide: + line_id <- structure(1:nrow(tx$cost),.Names=rownames(tx$cost)) + + tx$cost <- do.call(cbind,lapply(line_id, function(tx_line) { + dat <- t(tx$cost[paste0("line_",tx_line),]) + colnames(dat) <- paste0(colnames(dat),"_L",tx_line) + return(dat) + })) + + qaly_index <- structure(1:nrow(tx$qaly),.Names=rownames(tx$qaly)) + + tx$qaly <- do.call(cbind,lapply(1:length(qaly_index), function(cata) { + state_name <- names(qaly_index)[cata] + dat <- t(tx$qaly[state_name,]) + colnames(dat) <- paste0(colnames(dat),"_",state_name) + return(dat) + })) + + return(data.table(trt_n = tx$n,trt = tx$txt,tx$cost,tx$qaly)) + + }) + ) + + + # apply the weightings and sum up by first-line therapy: + # Sum up by first line therapy: + + subs$Population<- NULL + if (disc == FALSE) { + + # If undiscounted then add in the additional columns for state residency: + + res_ly <- res_ly[,-"trt"] + ly_cols <- colnames(res_ly) + ly_cols <- ly_cols[!ly_cols %in% c("trt_n")] + + res_lym <- merge.data.table( + subs, + res_ly, + by="trt_n" + ) + + res_lym <- as.data.frame(res_lym) + res_lym[,ly_cols] <- res_lym[,ly_cols] * res_lym$Adj.proportion.given.line.1 + res_lym <- as.data.table(res_lym) + + res_lym[is.na(res_lym)] <- 0 + res_lym[,(ly_cols) := lapply(.SD, sum),.SDcols = ly_cols,by="L1"] + res_lym <- res_lym[,head(.SD,1),by="L1"] + res_lym <- res_lym[,-c("Adj.proportion.given.line.1","trt_n")] + + colnames(res_lym)[2:length(colnames(res_lym))] <- paste0("ly_",colnames(res_lym)[2:length(colnames(res_lym))]) + } + + # subs$Adj.proportion.given.line.1<- NULL + # Merge + + final_table <- merge.data.table( + subs, + fbdt, + by = "trt_n" + ) + + cols <- colnames(final_table) + cols <- cols[!cols %in% c("trt_n", "L1", "Adj.proportion.given.line.1", "trt")] + + # apply the weightings all at once: + + final_table <- as.data.frame(final_table) + final_table[,cols] <- final_table[,cols] * final_table$Adj.proportion.given.line.1 + final_table <- as.data.table(final_table) + + # add everything up by first line treatment + + final_table[, (cols) := lapply(.SD, sum), .SDcols = cols, by = "L1"] + final_table <- final_table[,head(.SD,1),by="L1"] + + final_table$trt_n <- NULL + final_table$trt <- NULL + final_table$Adj.proportion.given.line.1 <- NULL + + cols_to_drop <- c(paste0("eol_L",1:4),"ae_qaly_BSC","qaly_dead", "ae_qaly_dead") + final_table[,(cols_to_drop) := NULL] + + if (disc) { + return(final_table) + } else { + return(merge.data.table(final_table,res_lym,by = "L1")) + } + +} + + + +#' WORK IN PROGRESS - NOT USED CURRENTLY. Weighting by prior adjuvant. Requires the same treatments to be avaialable +#' in both populations as it produces a weighted average table +f_res_wam_prior_adjuvant <- function(lookups, char, wam_disc, wam_undisc) { + + # lookup tables: + lu_pop <- lookups$pop_map + lu_rpop <- lookups$ipd$pop + + # cycling index: + rpop_labs <- structure( + sort(unique(p$basic$lookup$pop_map$Risk.population.number)), + .Names = paste0("risk_pop_",sort(unique(p$basic$lookup$pop_map$Risk.population.number))) + ) + + # Cycle through risk pops, calculating weighted average breakdown tables: + lapply(rpop_labs, function(risk_pop) { + + char$rpop <- lu_rpop[match(char$Population,lu_rpop$Description),]$Number + char <- char[Treatment.line == 1 & rpop==risk_pop,] + + # pull out our weighting for prior (N.B. this is 0 in base-case) + w_prior <- char$Prior.IO...in.12.months.Mean + + # our risk population is then our lu_pop for this risk population: + tab_rpop <- lu_pop[Risk.population.number == risk_pop,] + + # We then pull the OVERALL populations from the weighted table, weighting the + # outcomes further: + pops_oo <- tab_rpop$Overall.population.number + + # pull out discounted and undiscounted + disc <- wam_disc[pops_oo] + undisc <- wam_undisc[pops_oo] + + # Determine which of the populations is the no prior adjuvant one + # in case different ones are in a different order from each other: + npa_which <- grep("no prior adjuvant",tab_rpop$Sequencing.population) + + # Make a separate number for each one so we know which is our no prior and which + # is our prior + pa_i <- pops_oo[-npa_which] + npa_i <- pops_oo[npa_which] + + # Discounted weighted table: + wa_disc <- (w_prior * disc[[paste0("pop_",pa_i)]]) + ((1-w_prior)*disc[[paste0("pop_",npa_i)]]) + wa_undisc <- (w_prior * undisc[[paste0("pop_",pa_i)]]) + ((1-w_prior)*undisc[[paste0("pop_",npa_i)]]) + + return(list( + disc = wa_disc, + undisc = wa_undisc + )) + }) +} + + +#' quick function to add up stuff in the weighted model tables: +f_res_sum_weighted_model <- function(rd, rud ) { + + # Make some flags (bits of strings inside columns) + cflag <- c("drug", "admin", "mru", "ae_cost", "eol") + qflag <- c("qaly_") + lyflag <- c("ly_") + + rd$costs <- rowSums(dplyr::select(rd,contains(cflag))) + rd$qalys <- rowSums(dplyr::select(rd,contains(qflag))) + ly <- dplyr::select(rud,contains(lyflag)) + rd$ly <- rowSums(dplyr::select(ly,!contains(qflag,))) + # make a results table and return that + return(rd[,list(L1,costs,qalys,ly)]) +} + + + +#' Function to generate weighted traces by first-line therapy received according to the +#' weightings provided in the excel named range `R_table_sub_txts_prop_n_costs`. +#' +#' @param pf_list_pop the patient flow for this markov run for this OVERALL population +#' @param oo_pop_n numeric for the overall population (1-6) +#' @param subs `p$costs$settings$subsTx`, which is named range `R_table_sub_txts_prop_n_costs` from Excel (containing the weightings) +#' @param lookups lookup list from `p$basic$lookup` +#' @param max_lines maximum number of treatment lines, used to pad out smaller traces. +#' +f_pf_wa_sr_plots <- function(pf_list_pop, oo_pop_n, subs, lookups, max_lines, no_active_lines) { + + # Lookup tables: + lu_mol <- lookups$ipd$mol + lu_pop <- lookups$pop_map + + # Weighting table: + # The subsTx table only needs to be computed once, so just get it done: + subs$L1 <- lu_mol[match(subs$Line.1,RCC_input_desc,nomatch = NA),]$Number + subs$L2 <- lu_mol[match(subs$Line.2,RCC_input_desc,nomatch = NA),]$Number + subs$L3 <- lu_mol[match(subs$Line.3,RCC_input_desc,nomatch = NA),]$Number + subs$L4 <- lu_mol[match(subs$Line.4,RCC_input_desc,nomatch = NA),]$Number + subs$L5 <- 999 + subs <- + subs[!is.na(Population), list( + Population, + L1, + L2, + L3, + L4, + L5, + Adj.proportion.given.line.1 + )] + + if (no_active_lines ==3 ) { + new_subs <- subs[, sum(Adj.proportion.given.line.1), by = .(Population, L1, L2, L3)] + colnames(new_subs)[colnames(new_subs) == 'V1'] <- "Adj.proportion.given.line.1" + new_subs <- cbind(new_subs, L4 = rep("NA", nrow(new_subs)), L5 = rep(999, nrow(new_subs))) + + } + if (no_active_lines ==2 ) { + new_subs <- subs[, sum(Adj.proportion.given.line.1), by = .(Population, L1, L2)] + colnames(new_subs)[colnames(new_subs) == 'V1'] <- "Adj.proportion.given.line.1" + new_subs <- cbind(new_subs, L3 = rep("NA", nrow(new_subs)),L4 = rep("NA", nrow(new_subs)), L5 = rep(999, nrow(new_subs))) + + } + + + # Compile trt_n to match with the breakdown table + subs$trt_n <- do.call(paste, c(subs[,paste0("L",1:max_lines),with=FALSE], sep="→")) + + + # now we have a unique identified assigned for each sequence assign the new values taking into account the maximum number of lines + + if (no_active_lines <4 ) { + new_subs$trt_n <- do.call(paste, c(new_subs[,paste0("L",1:max_lines),with=FALSE], sep="→")) + subs$Adj.proportion.given.line.1 <- new_subs$Adj.proportion.given.line.1[match(subs$trt_n, new_subs$trt_n)] + subs$Adj.proportion.given.line.1[is.na(subs$Adj.proportion.given.line.1)] <- 0 + } + + + subs$trt_n <- gsub("→NA","",subs$trt_n) + subs <- subs[,list(Population,L1,trt_n,Adj.proportion.given.line.1)] + + # Finally, filter down to only the overall population we're doing + pop_s_n <- lu_pop[match(oo_pop_n, lu_pop$Overall.population.number)]$Sequencing.population.number + subs <- subs[Population == paste0("pop",pop_s_n),] + + + # now, we need to "flatten" the list, as it's currently broken up into + sequences_flattened <- Reduce( + x = 1:length(pf_list_pop), + init = list(), + accumulate = FALSE, + f = function(prev, n_lines) { + if (n_lines == 1) { + prev <- pf_list_pop[[n_lines]] + return(prev) + } else { + sequences <- pf_list_pop[[n_lines]] + starting_n <- sum(unlist(lapply(pf_list_pop[1:(n_lines-1)],length))) + 1 + len <- length(sequences) + names(sequences) <- paste0("seq_",starting_n:(starting_n+len-1)) + prev <- c(prev,sequences) + return(prev) + } + } + ) + + # Now we need to translate the treatment names into numerical versions, which match + # the subs table. we can then filter the subs table down to the row in question to + # get the weighting and compute a weighted average consolidated trace. + sequences_flattened <- lapply(sequences_flattened, function(this_seq) { + this_seq$trt_n <- paste(lu_mol[match(this_seq$trt_nam,Description,nomatch = NA),]$Number,collapse = "→") + this_seq$L1 <- lu_mol[match(this_seq$trt_nam[1],Description,nomatch = NA),]$Number + + tl <- length(this_seq$trt_nam) + atl <- tl - 1 + ncol_actual <- ncol(this_seq$trace_consol) + ncol_target <- (max_lines * 2) + + # if we need to pad with empty columns: + if (ncol_actual < ncol_target) { + lines_to_add <- (ncol_target - ncol_actual)/2 + line_txt <- paste0("L",((atl+1):(4))) + line_txt <- unlist(lapply(line_txt, function(x) paste0(x,c("_on", "_off")))) + empty_matrix <- matrix( + data = rep(0,length(line_txt)*nrow(this_seq$trace_consol)), + nrow = nrow(this_seq$trace_consol), + ncol = length(line_txt), + dimnames = list(NULL,line_txt) + ) + this_seq$trace_consol <- as.data.table(cbind(this_seq$trace_consol,empty_matrix)) + setcolorder( + this_seq$trace_consol, + c("L1_on","L1_off","L2_on","L2_off","L3_on","L3_off","L4_on","L4_off","BSC","dead") + ) + this_seq$trace_consol <- as.matrix(this_seq$trace_consol) + } + + return(list( + trt_nam = this_seq$trt_nam, + trt_n = this_seq$trt_n, + L1 = this_seq$L1, + trace_consol = this_seq$trace_consol + )) + }) + + # So now we have a "full" consolidated trace for each possible treatment pathway. + # We can now simply weight each one according to its corresponding weighting in + # subs, and sum the results by first line treatment! + w_seq <- lapply(sequences_flattened, function(this_sequence) { + + # filter down subs: + weighting <- subs[trt_n == this_sequence$trt_n,] + + stopifnot(nrow(weighting) %in% c(0,1)) + + if (nrow(weighting) == 0) { + out <- this_sequence$trace_consol * 0 + } else if (nrow(weighting) == 1) { + w <- weighting$Adj.proportion.given.line.1 + out <- this_sequence$trace_consol * w + } + + return(list( + trt_n = this_sequence$trt_n, + L1 = this_sequence$L1, + trace_weighted = out + )) + }) + + # With the weighted sequences grouping by first-line treatment (i.e. L1), + # add up the traces. + L1 <- unique(unlist(lapply(w_seq, function(x) x$L1),use.names = F)) + tab_L1 <- table(unlist(lapply(w_seq, function(x) x$L1),use.names = F)) + mat_dim <- dim(w_seq[[1]]$trace_weighted) + state_nam <- colnames(w_seq[[1]]$trace_weighted) + + L1_trt_pos_lab <- structure(1:length(L1), .Names = paste0("L1_",L1)) + + # Produce weighted average consolidated trace lines per first line therapy + weighted_traces <- lapply(L1_trt_pos_lab, function(L1_trt) { + + # Start from an empty matrix, and then cycle through all of the weighted + # traces. if L1 matches L1_trt, add it to the pile, if not don't. + empty_mat <- matrix( + data = 0, + nrow = mat_dim[1], + ncol = mat_dim[2], + dimnames = list(NULL,state_nam) + ) + + # Starting from an empty matrix + Reduce( + x = 1:length(w_seq), + init = empty_mat, + accumulate = FALSE, + f = function(prev, trt_seq_n) { + tr <- w_seq[[trt_seq_n]] + if (tr$L1 == L1[L1_trt]) { + prev <- prev + tr$trace_weighted + return(prev) + } else { + return(prev) + } + } + ) + + }) + + # produce plots for each of these traces + plot_list <- lapply(weighted_traces, function(first_line_trt) { + f_plot_mk_draw_consol_trace(consol_trace = first_line_trt, + treatment_names = paste0("L",1:5), + tmax = 15) + }) + + undiscounted_tis <- do.call(rbind,lapply(weighted_traces,function(x) colSums(x)/52.17857)) + + undiscounted_tis <- data.table(rownames_to_column(as.data.frame(undiscounted_tis),"L1")) + + undiscounted_tis$L1 <- gsub("L1_","",undiscounted_tis$L1) + + # Return a list of traces, plots, and lifetime outcomes: + return(list( + traces = weighted_traces, + plots = plot_list, + tis = undiscounted_tis + )) + +} + diff --git a/3_Functions/results/partitioned_survival.R b/3_Functions/results/partitioned_survival.R new file mode 100644 index 0000000..7332abe --- /dev/null +++ b/3_Functions/results/partitioned_survival.R @@ -0,0 +1,102 @@ +#' Produce a summary of the PS model +#' +#' @param pf_ps patient flow output for the PS mdoel +#' @param lookups the lookup tables i.e., `p$basic$lookup` +#' @param top_line only top line results or full results. should be TRUE in the PSA. +#' +f_res_summary_ps <- function(pf_ps,lookups, top_line = FALSE) { + + lu_mol <- lookups$ipd$mol + + # Produce a breakdown table for discounted everything except for life years(undiscounted) + bdt <- lapply(pf, function(popu) { + # popu <- pf$pop_1 + rbindlist( + lapply(popu, function(L1_treatment) { + # L1_treatment <- popu[[1]] + + lys <- as.data.table(L1_treatment$lys) + setnames(lys,c("PFS_on","PFS_off","PPS_on","PPS_off"),c("ly_PFS_on","ly_PFS_off","ly_PPS_on","ly_PPS_off")) + qalys <- as.data.table(L1_treatment$qalys$disc) + setnames(qalys,c("PFS","PPS","AE","AE_PPS"),c("qaly_PFS","qaly_PPS","qaly_AE","qaly_AE_PPS")) + costs <- as.data.table(L1_treatment$costs$disc) + setnames(costs,c("drug", + "admin", + "AE", + "substrt_drug_cost", + "substrt_admin_cost", + "substrt_AE_cost", + "mru_preprog", + "mru_postprog", + "EOL_cost", + "prog_cost"),c("cost_drug", + "cost_admin", + "cost_AE", + "cost_substrt_drug", + "cost_substrt_admin", + "cost_substrt_AE", + "cost_mru_preprog", + "cost_mru_postprog", + "cost_EOL", + "cost_prog")) + + return(data.table(t(colSums(data.table(lys,qalys,costs))))) + }) + ) + }) + + # summarise the breakdown table into final results (tl = top-line) + tl <- lapply(bdt, function(popu) { + + # add up some columns: + popu[, `:=`( + costs = cost_drug + + cost_admin + + cost_AE + + cost_substrt_drug + + cost_substrt_admin + + cost_substrt_AE + + cost_mru_preprog + + cost_mru_postprog + + cost_EOL + + cost_prog, + qalys = qaly_PFS + + qaly_PPS + + qaly_AE + + qaly_AE_PPS, + lys = ly_PFS_on + + ly_PFS_off + + ly_PPS_on + + ly_PPS_off + )] + + # get just summary columns: + return(popu[,list(costs,qalys,lys)]) + + }) + + # Get the 1L therapies for each + trts <- lapply(pf, names) + trt_n <- lapply(trts, function(pop_oo) {lu_mol[match(pop_oo,RCC_input_desc),]$Number}) + + bdt <- lapply(structure(1:length(bdt),.Names = names(bdt)), function(pop_n){ + dat <- bdt[[pop_n]] + dat$L1 <- trt_n[[pop_n]] + return(dat) + }) + tl <- lapply(structure(1:length(tl),.Names = names(tl)), function(pop_n){ + dat <- tl[[pop_n]] + dat$L1 <- trt_n[[pop_n]] + return(dat) + }) + + if(top_line) { + return(tl) + } else { + return(list( + breakdown = bdt, + top_line = tl + )) + } + +} \ No newline at end of file diff --git a/3_Functions/results/results_tables.R b/3_Functions/results/results_tables.R new file mode 100644 index 0000000..85af263 --- /dev/null +++ b/3_Functions/results/results_tables.R @@ -0,0 +1,980 @@ +#' function to compute pairwise ICERs against a specific treatment. expects +#' the model-averaged 1L treatment sequencing results. +#' +#' @param tab `res$mk$wa_summarised` for a specific population (top line table with L1 costs qalys ly columns) +#' @param int_L1 the treatment to treat as the "intervention" to calculate pairwise ICERs +#' @param lookup the lookup tables, usually `p$basic$lookup` +#' +f_res_ICER_pairwiseVsoneTrt <- function(tab, int_L1, lookup) { + + int <- tab[L1 == int_L1,] + + comp <- tab[L1 != int_L1,] + + ICERs <- rbindlist( + lapply(1:nrow(comp), function(comparator) { + out <- list() + + out$ic <- int$costs - comp[comparator,]$costs + out$iq <- int$qalys - comp[comparator,]$qalys + out$il <- int$ly - comp[comparator,]$ly + out$icer <- out$ic / out$iq + + data.table(t(unlist(out))) + + }) + ) + + ICERs <- rbind( + data.table(t(structure(rep(NA,4),.Names=colnames(ICERs)))), + ICERs + ) + + results_table <- cbind(tab,ICERs) + results_table$L1 <- lookup[match(results_table$L1, lookup$Number),]$Description + + return(results_table) +} + + +#' Function to route to lower level functions per model structure in order to +#' process patient flow (i.e., pf) and return results. +#' +#' @param pf patient flow object resulting from `f_pf_computePF` +#' @param structure `State transition` or `Partitioned survival` +#' @param p the full p list for this model run +#' @param detail_level 1 is top line tables only, 2 is +#' +f_res_compute_results <- function(pf, structure, p, detail_level = 1,vs_mol=1, no_active_lines = 4) { + stopifnot(structure %in% c("State transition","Partitioned survival")) + if (structure == "State transition") { + f_res_compute_results_mk(pf,p,detail_level,vs_mol, no_active_lines) + } else { + f_res_compute_results_ps(pf,p,detail_level) + } +} + + +#' Results summarizer for the partitioned survival model +#' +#' @param pf patient flow object for markov model +#' @param p full p list for this run +#' @param detail level 1-5 +#' +f_res_compute_results_ps <- function(pf,p,detail_level = 1) { + + stopifnot(detail_level %in% 1:5) + out <- list() + + out$tables <- f_res_summary_ps( + pf_ps = pf, + lookups = p$basic$lookup, + top_line = FALSE + ) + + if(detail_level == 1) return(out$tables$top_line) + + pop_labs <- structure(paste0("pop_",1:length(out$tables$top_line)),.Names=paste0("pop_",1:length(out$tables$top_line))) + + out$incremental <- lapply(pop_labs, function(popu) { + dat <- out$tables$top_line[[popu]] + f_res_mk_incremental( + res_d = dat, + res_ud = dat, + produce_plot = TRUE, + lu_mol = p$basic$lookup$ipd$mol, + output_weighted = "Yes" + ) + }) + + + if(detail_level == 2) return(list( + summary = out$tables$top_line, + incremental = lapply(out$incremental, function(x) { + if(!"non_dominated" %in% names(x)) { + x + } else { + x$non_dominated + } + }) + )) + + if(detail_level >= 3) { + out$ly <- lapply(pf, function(popu) { + do.call(rbind, + lapply(popu, function(L1_treatment) { + sapply(L1_treatment$lys, sum) + })) + + }) + out$disc_qaly <- lapply(pf, function(popu) { + do.call(rbind, + lapply(popu, function(L1_treatment) { + sapply(L1_treatment$qalys$disc, sum) + })) + + }) + out$disc_cost <- lapply(pf, function(popu) { + do.call(rbind, + lapply(popu, function(L1_treatment) { + sapply(L1_treatment$costs$disc, sum) + })) + + }) + } + + # Sample code is provided below which allows the production of PartSA outputs to view as plots or in the console + # Highest detail is plots: + if(detail_level > 4){ + out$sr_plots <- lapply(pf, function(popu){ + lapply(popu, function(L1_trt) { + L1_trt$sr_plot + }) + }) + } + + return(out) + +} + + + + + + + +#' Results summarizer for the state transition model +#' +#' @param pf patient flow object for markov model +#' @param p full p list for this run +#' @param detail level 1-5 +#' @param vs_mol decision problem molecule for pairwise ICERs +#' +f_res_compute_results_mk <- function(pf,p,detail_level = 1, vs_mol = 1, no_active_lines) { + + stopifnot(detail_level %in% 1:5) + + out <- list() + + # In any case, we need to run the main results calculator by treatment sequence + out$undisc = f_pf_mk_summary( + pf_list = pf, + disc_undisc = "undisc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y + ) + out$disc = f_pf_mk_summary( + pf_list = pf, + disc_undisc = "disc", + lookups = p$basic$lookup, + full_breakdown = TRUE, + breakdown = TRUE, + ypc = p$basic$cl_y + ) + + # If detail level is 4 or above return per sequence results. per sequence + # incrementals aren't needed for weighting analysis so optional here for higher + # detail levels + if (detail_level >= 4) { + out$incremental <- lapply(structure(names(out$disc),.Names=names(out$disc)),function(popu) { + f_res_mk_incremental( + res_d = out$disc[[popu]]$res, + res_ud = out$undisc[[popu]]$res, + produce_plot = detail_level > 4, + no_active_lines = no_active_lines, + output_weighted = "No") + }) + } + + pop_index <- structure(1:length(out$undisc), .Names = paste0("pop_", 1:length(out$undisc))) + + # compute weighted models - we need to do this for ALL levels of detail + out$weighted_model_disc <- lapply(pop_index, function(overall_pop) { + f_res_wa_model( + res_obj = out, + pop_oo = overall_pop, + subs = p$costs$settings$subsTx, + ptchar = p$demo$table, + lookups = p$basic$lookup, + disc = TRUE, + no_active_lines = no_active_lines, + max_lines = p$basic$R_maxlines+1 + ) + }) + out$weighted_model_undisc <- lapply(pop_index, function(overall_pop) { + f_res_wa_model( + res_obj = out, + pop_oo = overall_pop, + subs = p$costs$settings$subsTx, + ptchar = i$R_table_ptchar, + lookups = p$basic$lookup, + disc = FALSE, + no_active_lines = no_active_lines, + max_lines = p$basic$R_maxlines+1 + ) + }) + + # only for full detail do we want the trace plots: + if (detail_level > 4) { + out$weighted_trace_plots <- lapply(pop_index, function(pop_n) { + f_pf_wa_sr_plots( + pf_list_pop = pf[[pop_n]], + oo_pop_n = pop_n, + subs = p$costs$settings$subsTx, + lookups = p$basic$lookup, + max_lines = p$basic$R_maxlines+1, + no_active_lines = no_active_lines + ) + }) + } + + # the top line result is this - the weighted average results + out$wa_summarised <- lapply(pop_index, function(popu) { + pop_txt <- names(pop_index)[popu] + f_res_sum_weighted_model( + rd = out$weighted_model_disc[[pop_txt]], + rud = out$weighted_model_undisc[[pop_txt]] + ) + }) + + # If it's detail level 1, JUST give me these tables + if (detail_level == 1) return(out$wa_summarised) + + + out$weighted_incremental <- lapply(out$wa_summarised, function(popu) { + popu <- popu[order(popu$costs),] + f_res_mk_incremental( + res_d = popu, + res_ud = popu, + produce_plot = detail_level > 4, + lu_mol = p$basic$lookup$ipd$mol, + no_active_lines = no_active_lines, + output_weighted = "Yes" + ) + }) + + + # Detail level 2 is just top line table and non-dominated incremental results: + if (detail_level == 2) { + return(list( + summary = out$wa_summarised, + incremental = lapply(out$weighted_incremental, function(x) x$non_dominated) + )) + } + + if(detail_level >= 3) { + out$pairwise_vs_mol <- lapply(out$wa_summarised[1:3], function(oo_pop) { + f_res_ICER_pairwiseVsoneTrt(oo_pop,vs_mol,p$basic$lookup$ipd$mol) + }) + } + + + # For the higher detail levels we already have breakdown tables and per-sequence + # results done, depending on detail_level + return(out) +} + + +#' function to create LY and QALY output breakdowns for automated reporting +#' +#' @param tab `cabo_nivo_outcomes` for the weighted population +#' @param tab `comparator_outcomes` for the weighted population - this should use the same comparator as defined in comparator_no +#' @param comparator_no the treatment to treat as the "comparator" to produce the results breakdown - this should be the closest comparator on the incremental analysis +#' @param LYorQALY whether to report LYs or QALYs + + +ff_report_outcomes_breakdown <- + function(cabo_nivo_outcomes, + comparator_outcomes, + comparator_no, + LYorQALY) { + + stopifnot(LYorQALY %in% c("LY","QALY")) + + # Make a character vector for the names to assign depending on settings: + if (LYorQALY == "LY") { + nam_dat <- gsub("ly_","",names(cabo_nivo_outcomes)) + } else { + nam_dat <- gsub("qaly_","",names(cabo_nivo_outcomes)) + } + + # Put suffix expansion: + nam_dat <- gsub("_on",": on treatment",nam_dat) + nam_dat <- gsub("_off",": off treatment",nam_dat) + + # Note: column names can't start with a number in R so we're doing that after + + names(cabo_nivo_outcomes) <- nam_dat + names(comparator_outcomes) <- nam_dat + + ft_outcomes_breakdown <- + data.table( + Health_state = nam_dat, + cabo_nivo = as.numeric(cabo_nivo_outcomes), + comp = as.numeric(comparator_outcomes) + ) + + + ft_outcomes_breakdown <- ft_outcomes_breakdown[order(Health_state),] + ft_outcomes_breakdown <- ft_outcomes_breakdown[c(2:nrow(ft_outcomes_breakdown),1),] + ft_outcomes_breakdown$Health_state <- gsub("L","",ft_outcomes_breakdown$Health_state) + ft_outcomes_breakdown$Health_state <- sub("(.*)(\\d)", "\\1\\2L", ft_outcomes_breakdown$Health_state) + + + ft_outcomes_breakdown$inc <- ft_outcomes_breakdown$cabo_nivo - ft_outcomes_breakdown$comp + ft_outcomes_breakdown$absinc <- abs(ft_outcomes_breakdown$inc) + ft_outcomes_breakdown$percentabs <- ft_outcomes_breakdown$absinc / sum(ft_outcomes_breakdown$absinc) * 100 + + # Add in death row and totals row: + ft_outcomes_breakdown <- rbindlist(list( + ft_outcomes_breakdown, + lapply(1:ncol(ft_outcomes_breakdown), function(x) { + if (x == 1) { + "Death" + } else { + 0 + } + }), + lapply(1:ncol(ft_outcomes_breakdown), function(x) { + if (x == 1) { + "Total" + } else { + sum(ft_outcomes_breakdown[,x,with = FALSE]) + } + }) + + )) + + ft_outcomes_breakdown <- flextable(ft_outcomes_breakdown) + + if (LYorQALY == "LY") { + ft_outcomes_tab <- ff_LY_table(ft_outcomes_breakdown, comparator_no) + } else { + ft_outcomes_tab <- + ff_QALY_table(ft_outcomes_breakdown, comparator_no) + } + + return(ft_outcomes_tab) + + } + +#' function to create LY output breakdown for automated reporting +#' +#' @param tab `ft_LY_breakdown` for the weighted population based upon output of ff_report_outcomes_breakdown +#' @param comparator_no the treatment to treat as the "comparator" to produce the results breakdown - this should be the closest comparator on the incremental analysis + + +ff_LY_table <- function(ft_LY_breakdown, comparator_no) { + + ft_LY_tab <- ft_LY_breakdown %>% + theme_box() |> + set_header_labels( + values = list( + Health_state = "Health state", + cabo_nivo = "LY Cabozantinib plus nivolumab (X)", + comp = paste0("LY ", p$basic$lookup$ipd$mol[Number == comparator_no]$Description, " (Y)"), + inc = "Increment", + absinc = "Absolute increment", + percentabs = "% absolute increment" + ) + ) %>% + flextable::colformat_double(j = c(6), digits = 0, suffix = "%") %>% + flextable::colformat_double(j = c(2:5), digits = 3) %>% + add_footer_lines("Abbreviations: 1L, 1st line; 2L, 2nd line; 3L, 3rd line; 4L, 4th line; BSC, best supportive care; LY, life years; vs, versus") %>% + add_footer_lines("Discrepancies in sums due to rounding errors: totals shown are calculated on unrounded numbers") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + style(i=11, pr_t = fp_text_default(bold = TRUE)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_LY_tab) + +} + +#' function to create QALY output breakdown for automated reporting +#' +#' @param tab `ft_QALY_breakdown` for the weighted population based upon output of ff_report_outcomes_breakdown +#' @param comparator_no the treatment to treat as the "comparator" to produce the results breakdown - this should be the closest comparator on the incremental analysis + + +ff_QALY_table <- function(ft_QALY_breakdown, comparator_no) { + ft_QALY_tab <- ft_QALY_breakdown %>% + theme_box() |> + set_header_labels( + values = list( + Health_state = "Health state", + cabo_nivo = "QALY Cabozantinib plus nivolumab (X)", + comp = paste0("QALY ", p$basic$lookup$ipd$mol[Number == comparator_no]$Description, " (Y)"), + inc = "Increment", + absinc = "Absolute increment", + percentabs = "% absolute increment" + ) + ) %>% + flextable::colformat_double(j = c(6), + digits = 0, + suffix = "%") %>% + flextable::colformat_double(j = c(2:5), digits = 3) %>% + add_footer_lines( + "Abbreviations: 1L, 1st line; 2L, 2nd line; 3L, 3rd line; 4L, 4th line; BSC, best supportive care; LY, life years; vs, versus" + ) %>% + add_footer_lines( + "Discrepancies in sums due to rounding errors: totals shown are calculated on unrounded numbers" + ) %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, + size = 10, + part = c("header")) %>% + fontsize(i = NULL, + size = 10, + part = c("body")) %>% + fontsize(i = NULL, + size = 9, + part = c("footer")) %>% + style(i=11, pr_t = fp_text_default(bold = TRUE)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_QALY_tab) + +} + + +#' function to create cost breakdown for automated reporting +#' +#' @param tab `disc_results` discounted results from the res file for the weighted population +#' @param trt_no treatment number to produce the table for +#' @param pop population to produce the table for +#' @param cost_type list of the cost types to include + +ff_cost_table <- function(disc_results, trt_no, pop, cost_type = c("drug" , "admin" , "mru" , "eol" , "ae_cost")) { + + cost_inputs <- disc_results[[pop]][L1 == trt_no,] %>% dplyr::select(contains(cost_type)) + + subs_drug <- sum(cost_inputs %>% dplyr::select(starts_with("drug"))) - cost_inputs$drug_L1 + subs_admin <- sum(cost_inputs %>% dplyr::select(starts_with("admin"))) - cost_inputs$admin_L1 + subs_ae <- sum(cost_inputs %>% dplyr::select(starts_with("ae_cost"))) - cost_inputs$ae_cost_L1 + mru_1L <- cost_inputs$mru_on_L1 + cost_inputs$mru_off_L1 + subs_mru <- sum(cost_inputs %>% select(starts_with("mru"))) - mru_1L + + return(data.table(Population = p$basic$lookup$pop_map[Overall.population.number == as.numeric(gsub("\\D", "", pop))]$Risk.population, + Treatment = p$basic$lookup$ipd$mol[Number == trt_no]$Description, + L1_drug = cost_inputs$drug_L1, + L1_admin = cost_inputs$admin_L1, + L1_ae = cost_inputs$ae_cost_L1, + subs_drug = subs_drug, + subs_admin = subs_admin, + subs_ae = subs_ae, + mru_1L = mru_1L, + subs_mru = subs_mru, + eol_cost = cost_inputs$eol_L5, + Total = sum(cost_inputs))) +} + +#' function to create cost breakdown for automated reporting - combined results by risk status +#' +#' @param tab `cost_breakdown_2` cost breakdown from combined output of ff_cost_table +#' @param comparator_no the treatment to treat as the "comparator" to produce the results breakdown - this should be the closest comparator on the incremental analysis + +ff_cost_byrisk_table <- function(cost_breakdown_2, comparator_no) { + + ft_cost2_tab <- flextable(cost_breakdown_2) %>% + theme_box() |> + set_header_labels( + values = list( + Type = "Item", + Int = "Cost Cabozantinib plus nivolumab (X)", + Comp = paste0("Cost ", p$basic$lookup$ipd$mol[Number == comparator_no]$Description, " (Y)"), + Inc = "Increment", + abs = "Absolute increment", + abspercent = "% absolute increment" + ) + ) %>% + flextable::colformat_double(j = c(6), digits = 0, suffix = "%") %>% + flextable::colformat_double(j = c(2:5), digits = 0, prefix = "£") %>% + add_footer_lines("Abbreviations: 1L, 1st line; 2L, 2nd line; 2L+, 2nd line-plus; admin, administration; AE, adverse event; EOL, end of life; MRU, medical resource use") %>% + add_footer_lines("Discrepancies in sums due to rounding errors: totals shown are calculated on unrounded numbers") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + style(i=10, pr_t = fp_text_default(bold = TRUE)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_cost2_tab) + +} + +#' function to create scenario analysis format table for easy copy pasting +#' +#' @param tab `Scenario_table` table of scenario analysis results + + +ff_scenario_table <- function(Scenario_table) { + + ft_scenario_tab <- flextable(Scenario_table) %>% + theme_box() |> + set_header_labels( + values = list( + Scenario_name = "Scenario", + next_best = "Next best comparator*", + ic = "Incremental costs", + iq = "Incremental QALYs", + ICER = "ICER (£/QALY)" + ) + ) %>% + flextable::colformat_double(j = c(3,5), digits = 0, prefix = "£") %>% + flextable::colformat_double(j = c(4), digits = 3) %>% + add_footer_lines("Abbreviations: 1L, 1st line; 2L, 2nd line; 3L, 3rd line; 4L, 4th line; AEs, adverse events; AUC, area under the curve; axi, axitinib; BSC, best supportive care; evero, everolimus; FP, fractional polynomial; ICER, incremental cost-effectiveness ratio; IO, immune-oncology; IV, intravenous; KM, Kaplan-Meier; OS, overall survival; PD, progressed disease; PFS, progression free survival; PH, proportional hazards; PPS, post-progression survival; QALYs, quality adjusted life years; RDI, relative dosing intensity; RWE, real world evidence; tivo, tivozanib; TKI, tyrosine kinase inhibitor; TTD, time to discontinuation; TTP, time to progression ") %>% + add_footer_lines("*Next best comparator defined as next most efficient non-dominated comparator.") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_scenario_tab) + +} + +ff_scenario_pairwise_table <- function(ft_all_pairwise, Word_width_inches) { + + ft_scenario_tab <- ft_all_pairwise %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(ft_all_pairwise)))) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + costs = "Costs (£)", + qalys = "QALYs", + ly = "LYG", + ic = "Inc. Costs", + iq = "Inc. QALYs", + il = "Inc. LYG", + ICER = "ICER cabo + nivo vs comparator" + ) + ) %>% + flextable::colformat_double(j=c(2,5), digits = 0, prefix = "£") %>% + flextable::colformat_double(j=c(3,4,6,7), digits = 2) %>% + add_footer_lines("Abbreviations: ICER, incremental cost-effectiveness ratio; inc. incremental; LYG, life-years gained; QALY, quality-adjusted life-year") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_scenario_tab) + +} + + +#' function to identify the closest comparator for a population +#' +#' @param `res` results file containing the weighted incremental analysis +#' @param `pop` population to run the function for - needs to be input in format for overall population e.g. "pop_1" + +ff_closest_comparator <- function(res,pop){ + + if ( "Cabozantinib plus nivolumab" %in% res$weighted_incremental[[pop]]$L1) { + + # if cabo plus nivo is dominant then + # take the treatment with the highest QALYs + QALYs <- res$weighted_model_disc[[pop]] %>% + select(starts_with("qaly")) + total_QALYs <- rowSums(QALYs,na.rm = TRUE) + names(total_QALYs) <- res$weighted_model_disc[[pop]]$L1 + n <- length(total_QALYs) + second_QALY <- sort(total_QALYs,partial=n-1)[n-1] + + comparator_name <- names(total_QALYs[match(second_QALY, total_QALYs)]) + + comparator_no <- p$basic$lookup$ipd$mol[Description == comparator_name_lookup]$Number + + } else if( "Cabozantinib plus nivolumab" %in% res$weighted_incremental[[pop]]$non_dominated$L1) { + + # if cabo plus nivo appears in the incremental results non dominated list + # take the treatment it appears after in the list as the most relevant comparator + + position_cabonivo <- which("Cabozantinib plus nivolumab" == res$weighted_incremental[[pop]]$non_dominated$L1)[[1]] + + if (position_cabonivo == 1) { + + comparator_name_lookup <- res$weighted_incremental[[pop]]$non_dominated$L1[position_cabonivo+1] + + } else { + + comparator_name_lookup <- res$weighted_incremental[[pop]]$non_dominated$L1[position_cabonivo-1] + } + + comparator_no <- p$basic$lookup$ipd$mol[Description == comparator_name_lookup]$Number + + } else { + + # take the most effective treatment in the non dominated list + + position_comparator <- length(res$weighted_incremental[[pop]]$non_dominated$L1) + comparator_name_lookup <- res$weighted_incremental[[pop]]$non_dominated$L1[position_comparator] + comparator_no <- p$basic$lookup$ipd$mol[Description == comparator_name_lookup]$Number + + } + + return(comparator_no) + +} + +ff_closest_comparator_PartSA <- function(res,pop){ + + if ( "Cabozantinib plus nivolumab" %in% res$incremental[[pop]]$L1) { + + # if cabo plus nivo is dominant + # take the treatment with the highest QALYs + QALYs <- as.data.table(res$disc_qaly[[pop]]) + total_QALYs <- rowSums(QALYs,na.rm = TRUE) + names(total_QALYs) <- rownames(res$disc_qaly[[pop]]) + n <- length(total_QALYs) + second_QALY <- sort(total_QALYs,partial=n-1)[n-1] + + comparator_name <- names(total_QALYs[match(second_QALY, total_QALYs)]) + + comparator_no <- p$basic$lookup$ipd$mol[RCC_input_desc == comparator_name ]$Number + + } else if( "Cabozantinib plus nivolumab" %in% res$incremental[[pop]]$non_dominated$L1) { + + # if cabo plus nivo appears in the incremental results non dominated list + # take the treatment it appears after in the list as the most relevant comparator + + position_cabonivo <- which("Cabozantinib plus nivolumab" == res$incremental[[pop]]$non_dominated$L1)[[1]] + + if (position_cabonivo == 1) { + + comparator_name_lookup <- res$incremental[[pop]]$non_dominated$L1[position_cabonivo+1] + + } else { + + comparator_name_lookup <- res$incremental[[pop]]$non_dominated$L1[position_cabonivo-1] + + } + + comparator_no <- p$basic$lookup$ipd$mol[Description == comparator_name_lookup]$Number + + } else { + + # take the most effective treatment in the non dominated list + + position_comparator <- length(res$incremental[[pop]]$non_dominated$L1) + comparator_name_lookup <- res$incremental[[pop]]$non_dominated$L1[position_comparator] + comparator_no <- p$basic$lookup$ipd$mol[Description == comparator_name_lookup]$Number + + } + + return(comparator_no) + +} + +f_res_cabonivo_SevMod <- function(res, oo_pop_string, pop_n, comp_numb) { + # note that in calc_severity_modifier, the argument sex is the proportion male + + if (i$dd_age_sex_source == "Mean") { + + # So for this risk population, we need the baseline characteristics: + bl_chars <- i$R_table_ptchar[Population == oo_pop_string & Treatment.line == 1,] + bl_age <- bl_chars$Starting.age..years..Mean + bl_male <- 1-bl_chars$Starting...female.Mean + + } else { + + patient_sex_age_IPD <- as.data.table(i$R_table_patientagesex) + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="M","male") + patient_sex_age_IPD$Gender <- replace(patient_sex_age_IPD$Gender, patient_sex_age_IPD$Gender=="F","female") + + bl_age <- patient_sex_age_IPD[Line ==1]$Age + bl_male <- patient_sex_age_IPD[Line ==1]$Gender + + } + + pna_txt <- names(res$wa_summarised)[pop_n] + + tab <- res$wa_summarised[[pna_txt]][L1 != 1,] + + met <- tab[L1 == comp_numb] + + q_met <- met$qalys + comp_no_met <- met$L1 + + out <- calc_severity_modifier( + age = bl_age, + sex = bl_male, + .patient_level = if(i$dd_age_sex_source == "Mean") {FALSE} else {TRUE}, + qalys = q_met, + .i = i, + .p = p + ) + + out <- cbind(risk_pop = p$basic$lookup$pop_map$Risk.population[pop_n], out, SOC = comp_numb) + +return(out) + +} + +ff_severity_table <- function(severity_table) { + ft_severity_tab <- flextable(severity_table) %>% + theme_box() |> + set_header_labels( + values = list( + risk_pop = "Risk group", + qaly_soc = "SOC QALYs", + qaly_gpop = "Gen pop QALYs", + abs_sf = "Abs SF", + prop_sf= "Prop SF", + modifier = "Modifier", + SOC = "Treatment considered SOC" + ) + ) %>% + flextable::colformat_double(j = c(6), digits = 1) %>% + flextable::colformat_double(j = c(2:5), digits = 3) %>% + flextable::colformat_char(j = c(1:7)) %>% + add_footer_lines( + "Abbreviations: Abs, absolute; Fav, favourable; Gen, general; Int, intermediate; pop, population; Prop, proportional; QALYs, quality adjusted life years; SF, shortfall; SOC, standard of care" + ) %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold(bold = TRUE, part = "header") %>% + fontsize(i = NULL, + size = 10, + part = c("header")) %>% + fontsize(i = NULL, + size = 10, + part = c("body")) %>% + fontsize(i = NULL, + size = 9, + part = c("footer")) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + return(ft_severity_tab) + +} + +ff_PartSALY_table <- function(PartSA_Lys) { + + ft_PartSALY_tab <- PartSA_Lys %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(PartSA_Lys)))) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + PFS_on = "PFS on treatment", + PFS_off = "PFS off treatment", + PPS_on = "PPS on treatment", + PPS_off = "PPS off treatment", + Total = "Total" + ) + ) %>% + flextable::colformat_double(j=c(2:6), digits = 2) %>% + add_footer_lines("Abbreviations: LYG, life years gained; PartSA, partitioned survival analysis; PFS, progression free survival; PPS, post progression survival") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_PartSALY_tab) + +} + +ff_PartSAQALY_table <- function(PartSA_QALYs) { + + ft_PartSAQALY_tab <- PartSA_QALYs %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(PartSA_QALYs)))) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + PFS = "PFS", + PPS = "PPS", + AE = "1L AEs", + AE_PPS = "AEs PPS", + Total = "Total" + ) + ) %>% + flextable::colformat_double(j=c(2:6), digits = 2) %>% + add_footer_lines("Abbreviations: AE, adverse event; PartSA, partitioned survival analysis; PFS, progression free survival; PPS, post progression survival; QALYs, quality adjusted life years") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_PartSAQALY_tab) + +} + +ff_PartSAcost_table <- function(PartSA_costs) { + + ft_PartSAcost_tab <- PartSA_costs %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(PartSA_costs)))) %>% + add_header_row(top = TRUE, values = c("","1L costs", "Subsequent treatment", "MRU","","",""), colwidths = c(1,3,3,2,1,1,1)) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + drug = "Drug cost", + admin = "Admin cost", + AE = "AE cost", + substrt_drug_cost = "Drug cost", + substrt_admin_cost = "Admin cost", + substrt_AE_cost = "AE cost", + mru_preprog = "Pre-progression cost", + mru_postprog = "Post-progression cost", + EOL_cost = "EOL cost", + prog_cost = "On progression cost", + Total = "Total" + ) + ) %>% + flextable::colformat_double(j=c(2:12), digits = 0, prefix = "£") %>% + add_footer_lines("Abbreviations: admin, administration; AE, adverse event; EOL, end of life; MRU, medical resource use; PartSA, partitioned survival analysis") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) %>% + align(i = 1, j = NULL, align = "center", part = "header") %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_PartSAcost_tab) + +} + +ff_PartSAresults_table <- function(PartSA_results) { + + ft_PartSA_results_tab <- PartSA_results %>% + rename(`Risk population` = risk_pop) %>% + as_grouped_data(groups = "Risk population") %>% + as_flextable() %>% + width(., width = (Word_width_inches/(ncol(PartSA_results)))) %>% + add_header_row(top = TRUE, values = c("","Total", "Incremental"), colwidths = c(1,3,5)) %>% + theme_box() |> + set_header_labels( + values = list( + L1 = "Technologies", + costs = "Costs", + lys = "LYs", + qalys = "QALYs", + ic = "Costs", + il = "LYG", + iq = "QALYs", + Pairwise_ICER = "ICER cabo + nivo vs comparator (£/QALY)", + ICER = "ICER incremental (£/QALY)" + ) + ) %>% + flextable::colformat_double(j=c(3,4,6,7), digits = 2, na_str = "") %>% + flextable::colformat_double(j=c(2,5), digits = 0, prefix = "£", na_str = "") %>% + add_footer_lines("Abbreviations: ext, extended; ICER, incremental cost-effectiveness ratio; LYG, life year gained; PartSA, partitioned survival analysis; QALY, quality adjusted life year; SW, south west; vs, versus") %>% + # add_header_row(colwidths = c(1,1, 2),values = c("","g1", "g2")) |> + bold( bold = TRUE, part="header") %>% + fontsize(i = NULL, size = 10, part = c("header")) %>% + fontsize(i = NULL, size = 10, part = c("body")) %>% + fontsize(i = NULL, size = 9, part = c("footer")) %>% + align(i = ~ !is.na(`Risk population`), align = "left") %>% + bold(i = ~ !is.na(`Risk population`)) %>% + autofit() %>% + set_table_properties(layout = "autofit") + + + return(ft_PartSA_results_tab) + +} + +ff_scenario_output <- function(res, Scenario_name, closest_comparator, pop, structure) { + + if (structure=="Partitioned survival") {location = "incremental"} else {location = "weighted_incremental"} + + Scenario_table <- data.table() + + Scenario_table$Scenario <- Scenario_name + + Scenario_table$next_best <- p$basic$lookup$ipd$mol[Number == closest_comparator]$Description + + if(sum(str_detect(res[[location]][[pop]]$non_dominated$L1,"Cabozantinib plus nivolumab"))>0) { + + position_cabonivo <- which("Cabozantinib plus nivolumab" == res[[location]][[pop]]$non_dominated$L1)[[1]] + + if(nrow(res[[location]][[pop]]$non_dominated) == 1) { + + Scenario_table$ic <- 0 + Scenario_table$iq <- 0 + Scenario_table$ICER <- "Cabo+nivo dominant vs all" + + } else if (position_cabonivo == 1){ + + Scenario_table$ic <- res[[location]][[pop]]$non_dominated[2]$ic + Scenario_table$iq <- res[[location]][[pop]]$non_dominated[2]$iq + Scenario_table$ICER <- paste0("£",round(res[[location]][[pop]]$non_dominated[2]$ICER,0)," SW quadrant comp vs cabo+nivo") + + } else { + + Scenario_table$ic <- res[[location]][[pop]]$non_dominated[L1 == "Cabozantinib plus nivolumab"]$ic + Scenario_table$iq <- res[[location]][[pop]]$non_dominated[L1 == "Cabozantinib plus nivolumab"]$iq + Scenario_table$ICER <- res[[location]][[pop]]$non_dominated[L1 == "Cabozantinib plus nivolumab"]$ICER + } + } else if ("Cabozantinib plus nivolumab" %in% res[[location]][[pop]]$L1) { + + Scenario_table$ic <- 0 + Scenario_table$iq <- 0 + Scenario_table$ICER <- "Cabo+nivo dominant vs all" + + } else { + + Scenario_table$ic <- 0 + Scenario_table$iq <- 0 + Scenario_table$ICER <- "Cabo+nivo dominated" + + } + + return(Scenario_table) + +} \ No newline at end of file diff --git a/3_Functions/scenarios/time_horizon.R b/3_Functions/scenarios/time_horizon.R new file mode 100644 index 0000000..0c62d8f --- /dev/null +++ b/3_Functions/scenarios/time_horizon.R @@ -0,0 +1,25 @@ + +#' function to update i$surv$reg so that the extrapolations in there for +#' deterministic analysis are reduced down to the time horizon +#' +#' @param regs `i$surv$reg` from model, after loading in or running them +#' @param TH time horizon `+1` cycles +#' +f_scen_TH_impose_surv <- function(regs, TH) { + lapply(regs, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(endp) { + if (is.null(endp$st)) { + return(endp) + } else { + endp$st <- endp$st[1:TH,] + return(endp) + } + }) + }) + }) + }) + }) +} \ No newline at end of file diff --git a/3_Functions/sequencing/rccFunctions.R b/3_Functions/sequencing/rccFunctions.R new file mode 100644 index 0000000..3ff53dc --- /dev/null +++ b/3_Functions/sequencing/rccFunctions.R @@ -0,0 +1,482 @@ +# Functions for RCC model + +# created 08/02/2023 by Ed Wilson + +#' Function to generate treatment sequences through generating all permutations of comparators +#' +#' +#' @param comparators A vector of comparator names. Note that the +#' @param maxlines The maximum number of treatment lines (such that each row of permutations output has that many columns) +#' +#' +generate_sequences <- function(comparators, maxlines = 5) { + + strategies <- list() + + # The maximum number of living states is the number of lines (BSC is always the last line) + # The number of model health states is number of lines +2 to include death too + max_living_states <- maxlines + + # List out all the possible treatment pathways by line, entering blank values + # when looking at lines past the first. So for 4th line patients there's only their + # treatment plus BSC to follow. + # + # lapply makes the list per line, rbindlist binds data.tables efficiently + # + rbindlist(lapply(1:(maxlines - 1), function(line) { + + # For this line, the number of remaining living states is the difference between + # line and the max of living states: + living_states_left <- max_living_states-line + + # use gtools' function to efficiently generate all possible permutations + # of treatments given the possible comparators, given starting from this + # line of therapy (i.e. so that at later lines there are less columns.) + out <- data.table(permutations(n = length(comparators), r = living_states_left, v = comparators)) + + # Adding in BSC as the rightmost column (data.table naming convention is VN + # where N is number. take the number of rows, and if it's less than the max + # then add empty columns to ensure there are maxlines+1 columnns at the end + out[[paste0("V",dim(out)[2]+1)]] <- "BSC" + nco <- dim(out)[2] + + # Make sure the output always has columns equal to the number of treatment lines + if (nco < max_living_states) { + ncol_to_add <- max_living_states - nco + for (col_to_add in 1:ncol_to_add) { + out[[paste0("V",nco+col_to_add)]] <- "" + } + } + + return(out) + })) +} + + + + + + + +#' Utility function to apply a filter sequentially to selected columns of a data.table +#' based on a text string. There is a data.table way to do this using .SDCols but it's +#' hard to read, so this is more transparent. Assumes the columns are named V per +#' data.table defaults. +#' +#' @param dat_tab a `data.table` containing the sequenceis +#' @param lines numeric vector of the treatment lines to remove the treatment from +#' @param trt character entry for the treatment to remove from the designated lines +#' +f_path_RemoveFromLines <- function(dat_tab, lines, trt) { + if (length(lines) == 1) { + return(dat_tab[eval(as.name(paste0("V",lines))) != trt,]) + } else { + Reduce( + x = paste0("V",lines), + accumulate = FALSE, + init = dat_tab, + f = function(tab_last_round, this_col) { + return(tab_last_round[eval(as.name(this_col)) != trt,]) + } + ) + } + +} + + + + +#' Utiltiy function to apply the "only after" ruleset to a treatment using subset +#' filtering. +#' +#' Note that this is a reverse filter (note the ! in front of the conditions). Handy +#' for only returning those results which don't match the conditions. in this case +#' it's essentially like performing a filter for those conditions. +#' +#' It's very efficient and easy to read. +#' +#' @description At each line, the treatments in rule aren't allowed after trt. filter out. +#' +#' @param perms permutatins of possible treatments +#' @param trt the treatment to apply this filtering alg to +#' @param rule a character vector of those treatments that aren't allowed after trt +#' +f_path_notAllowedAfter <- function(perms, trt, rule) { + perms <- subset(perms,!(V1 == trt & (V2 %in% rule | V3 %in% rule | V4 %in% rule | V5 %in% rule))) + perms <- subset(perms,!(V2 == trt & (V3 %in% rule | V4 %in% rule | V5 %in% rule))) + perms <- subset(perms,!(V3 == trt & (V4 %in% rule | V5 %in% rule))) + perms <- subset(perms,!(V4 == trt & (V5 %in% rule))) + return(perms) +} + + +#' Function as above but the opposite - only allowing the rule treatments before +#' the given treatment (trt). Filters out starting from 2L as it's pointless to +#' filter first line based on previous treatments! +f_path_onlyAllowedAfter <- function(perms, trt, rule) { + perms <- subset(perms,!(V2 == trt & !V1 %in% rule)) + perms <- subset(perms,!(V3 == trt & (!V1 %in% rule & !V2 %in% rule))) + perms <- subset(perms,!(V4 == trt & (!V1 %in% rule & !V2 %in% rule & !V3 %in% rule))) + perms <- subset(perms,!(V5 == trt & (!V1 %in% rule & !V2 %in% rule & !V3 %in% rule & !V4 %in% rule))) + return(perms) +} + +#' another inversion of the above functions. this one doesn't allow the rule treatments +#' to come before trt, such that if you say no vegf treatment before trt it will +#' filter out prior vegf +f_path_notAllowedBefore <- function(perms, trt, rule) { + perms <- subset(perms,!(V2 == trt & V1 %in% rule)) + perms <- subset(perms,!(V3 == trt & (V1 %in% rule | V2 %in% rule))) + perms <- subset(perms,!(V4 == trt & (V1 %in% rule | V2 %in% rule | V3 %in% rule))) + perms <- subset(perms,!(V5 == trt & (V1 %in% rule | V2 %in% rule | V3 %in% rule | V4 %in% rule))) + return(perms) +} + + +# List of the treatments that other treatments are only allowed subsequently to +# take these from excel so they're always up to date! + +apply_tx_restrictions <- function(sequences) { + #Rules as per Dawn's spreadsheet "Treatment Sequences" + # move to 3_functions once written + + # 1. Ave_axi 1L tx with advanced RCC + for (i in 2:ncol(sequences)) { + sequences <- sequences[sequences[,i]!="ave_axi",] + } + + # 2. Axitinib only 2L+ and after failure of sunitinib or a cytokine + # remove first line axitinib + sequences <- sequences[sequences[,1]!="axitinib",] + onlyafters <- c("sunitinib") #note - what are the cytokine drugs? Need to add + + sequences[,ncol(sequences)+1] <- TRUE + for (i in 1:nrow(sequences)) { + #ignore sequences with no axitinib + if (!("axitinib" %in% sequences[i,])) next + #remove sequences with axitinib but no of the 'onlyafters' drugs + if (!(onlyafters %in% sequences[i,])) { + sequences[i,ncol(sequences)] <- FALSE + next + } + #remove sequences where onlyafters are before axitinib + for (j in 1:length(onlyafters)) { + if (match(onlyafters[j], sequences[i,]) > match("axitinib", sequences[i,])){ + sequences[i,ncol(sequences)] <- FALSE + } + } + } + sequences <- sequences[sequences[,ncol(sequences)],] #drop violating sequences + sequences[,ncol(sequences)] <- NULL + rm(onlyafters) + + # 3. Belzutifan only after PDI (ipi_nivo, nivo_cabo, nivolumab, pem_len) 1L + # and VEGF TKI (ave_axi, axitinib, cabozantinib, len_evro, nivo_cabo, pazopinib, pem_len, sunitinib, tivozanib) + onlyafters1 <- c("ipi_nivo", "nivo_cabo", "nivolumab", "pem_len") + onlyafters2 <- c("ave_axi", "axitinib", "cabozantinib", "len_evro", + "nivo_cabo", "pazopinib", "pem_len", "sunitinib", "tivozanib") + + #remove any 1L belzutifan + sequences <- sequences[sequences[,1]!="belzutifan",] + + sequences[,ncol(sequences)+1] <- TRUE + for (i in 1:nrow(sequences)) { + #ignore sequences with no belzutifan + if (!("belzutifan" %in% sequences[i,])) next + + #remove sequences without PD1/PD-L1 inhibitor + if (!(sequences[i,1] %in% onlyafters1)) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences with belzutifan but no of the 'onlyafters' drugs + if (sum(onlyafters2 %in% sequences[i,])==0) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences without VEGF TKI before belzutifan + if (sum(sequences[i,] %in% onlyafters2) > 0) { + if(match("belzutifan", sequences[i,]) < min(match(onlyafters2,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + next + } + } + } + sequences <- sequences[sequences[,ncol(sequences)],] #drop violating sequences + sequences[,ncol(sequences)] <- NULL + rm(onlyafters1, onlyafters2) + + # 4. cabozantinib monotherapy for advanced renal cell carcinoma + #- as first-line treatment of adult patients with intermediate or poor risk + #- in adults following prior vascular endothelial growth factor (VEGF)-targeted therapy " + onlyafters <- c("ave_axi", "axitinib", "cabozantinib", + "len_evro", "nivo_cabo", "pazopinib", + "pem_len", "sunitinib", "tivozanib") + sequences[,ncol(sequences)+1] <- TRUE + + for (i in 1:nrow(sequences)) { + #ignore sequences with no cabozantinib + if (!("cabozantinib" %in% sequences[i,])) next + + #keep sequences with cabozantinib first line + if (sequences[i,1] == "cabozantinib") next + + #remove sequences without VEGF + if (sum(sequences[i,] %in% onlyafters) == 0) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences with cabozantinib before VEGF + if(match("cabozantinib", sequences[i,]) < min(match(onlyafters,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + print(i) + next + } + } + sequences <- sequences[sequences[,ncol(sequences)],] + sequences[,ncol(sequences)] <- NULL + + #Note - rule 4 doesn't drop any sequences - CHECK + + + # 5. Evrolimus only after VEGF-targeted therapy + onlyafters <- c("ave_axi", "axitinib", "cabozantinib", "len_evro", + "nivo_cabo", "pazopinib", "pem_len", "sunitinib", "tivozanib") + + #remove any 1L evrolimus + sequences <- sequences[sequences[,1]!="evrolimus",] + + sequences[,ncol(sequences)+1] <- TRUE + for (i in 1:nrow(sequences)) { + #ignore sequences with no evrolimus + if (!("evrolimus" %in% sequences[i,])) next + + #remove sequences with evrolimus but no of the 'onlyafters' drugs + if (sum(onlyafters %in% sequences[i,])==0) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences with evrolimus before VEGF + if (sum(sequences[i,] %in% onlyafters) > 0) { + if(match("evrolimus", sequences[i,]) < min(match(onlyafters,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + next + } + } + } + sequences <- sequences[sequences[,ncol(sequences)],] #drop violating sequences + sequences[,ncol(sequences)] <- NULL + rm(onlyafters) + + # 6. ipi_nivo only first line + for (i in 2:ncol(sequences)) { + sequences <- sequences[!(sequences[,i] == "ipi_nivo"),] + } + + # 7. len_evro following 1 prior VEGF-targeted therapy + onlyafters <- c("ave_axi", "axitinib", "cabozantinib", "len_evro", + "nivo_cabo", "pazopinib", "pem_len", "sunitinib", "tivozanib") + + #remove any 1L len_evro + sequences <- sequences[sequences[,1]!="len_evro",] + + sequences[,ncol(sequences)+1] <- TRUE + for (i in 1:nrow(sequences)) { + #ignore sequences with no len_evro + if (!("len_evro" %in% sequences[i,])) next + + #remove sequences with len_evro but none of the 'onlyafters' drugs + if (sum(onlyafters %in% sequences[i,])==0) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences with len_evro before VEGF and more than 1 VEGF + if (sum(sequences[i,] %in% onlyafters) > 0) { + #reject if len_evro is before a VEGF + if (match("len_evro", sequences[i,]) < min(match(onlyafters,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + } + #reject if nr VEGF therapies prior to len_evro > 1 + if (sum(match(onlyafters,sequences[i,], nomatch = 99) < match("len_evro", sequences[i,])) > 1) { + sequences[i,ncol(sequences)] <- FALSE + } + } + } + sequences <- sequences[sequences[,ncol(sequences)],] #drop violating sequences + sequences[,ncol(sequences)] <- NULL + rm(onlyafters) + + # 8. nivo_cabo first line only + for (i in 2:ncol(sequences)) { + sequences <- sequences[!(sequences[,i] == "nivo_cabo"),] + } + + # 9. nivolumab monotherapy only 2L+ + sequences <- sequences[!(sequences[,1] == "nivolumab"),] + + # 10. pazopinib 1L abd for patients who have received prior cytokine therapy + # (CHECK - what is cytokine therapy?) + sequences <- sequences[!(sequences[,1] == "nivolumab"),] + + # 11. pem_len 1L only + for (i in 2:ncol(sequences)) { + sequences <- sequences[!(sequences[,i] == "pem_len"),] + } + + # 12. sunitinib no restrictions + + # 13. tivozanib 1L and no prior VEG-F and mTOR + notafters <- c("ave_axi", "axitinib", "cabozantinib", "evrolimus", + "len_evro", "nivo_cabo", "pazopinib", + "pem_len", "sunitinib", "tivozanib") + sequences[,ncol(sequences)+1] <- TRUE + + for (i in 1:nrow(sequences)) { + #ignore sequences with no tivozanib + if (!("tivozanib" %in% sequences[i,])) next + + #keep sequences with tivozanib first line + if (sequences[i,1] == "tivozanib") next + + #remove sequences with tivozanib before VEGF or mTOR + if(match("tivozanib", sequences[i,]) > min(match(notafters,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + next + } + } + sequences <- sequences[sequences[,ncol(sequences)],] + sequences[,ncol(sequences)] <- NULL + + # 14. cannot use >1 IO + IO <- c("ave_axi", "ipi_nivo", "nivo_cabo", "nivolumab", "pem_len") #Note - CHECK (which are IOs?) + sequences[,ncol(sequences)+1] <- TRUE + + for (i in 1:nrow(sequences)) { + #ignore sequences with no one or less IO + if (sum(IO %in% sequences[i,]) < 2) next + + sequences[i,ncol(sequences)] <- FALSE + } + sequences <- sequences[sequences[,ncol(sequences)],] + sequences[,ncol(sequences)] <- NULL + + # 15. tivozanib, pazopinib, sunitinib can only be used after ipi_nivo + drugs <- c("tivozanib", "pazopinib", "sunitinib") + sequences[,ncol(sequences)+1] <- TRUE + for (i in 1:nrow(sequences)) { + #ignore sequences with no tivozanib, pazopinib, sunitinib + if (sum(drugs %in% sequences[i,]) == 0) next + + #remove sequences with tivozanib, pazopinib, sunitinib and no ipi_nivo + if (!("ipi_nivo" %in% sequences[i,])) { + sequences[i,ncol(sequences)] <- FALSE + next + } + + #remove sequences with tivozanib, pazopinib, sunitinib before ipi_nivo + if(match("ipi_nivo", sequences[i,]) > min(match(drugs,sequences[i,], nomatch = 99))) { + sequences[i,ncol(sequences)] <- FALSE + print(i) + next + } + } + sequences <- sequences[sequences[,ncol(sequences)],] + sequences[,ncol(sequences)] <- NULL +} + + +f_path_tx_restrict <- function(sequences, subs_to, prev_to, one_in_class) { + + s <- sequences + + # ave_axi is only permitted in first line therapy. therefore, filter out + # all subsequent uses of this treatment. Determine the columns and then apply the filter. + # Use the function f_path_RemoveFromLines to remove "ave_axi" from 2L+ + # + # Showing arguments once here, then will be a one-liner from here: + + s <- f_path_RemoveFromLines( + dat_tab = s, + lines = 2:ncol(s), + trt = "ave_axi" + ) + + # Next, axitinib 2L+ + s <- f_path_RemoveFromLines(s,1,"axitinib") + + # filter out unacceptable treatments previous to axitinib at all lines 2L+ + # sub_axi <- subs_to$axitinib + s <- f_path_notAllowedAfter(perms = s,trt = "axitinib",rule = subs_to$axitinib) + s <- f_path_onlyAllowedAfter(perms = s,trt = "axitinib",rule = subs_to$axitinib) + + # To QC this function e.g. sub_axi %in% unique(unlist(unique(s[V2 == "axitinib",list(V3,V4,V5,V6)]),use.names = F)) + + # cabo is allowed 1L as mono, OR 2L+ for those that have had a VEGF before + + s <- f_path_notAllowedAfter(s,"cabozantinib",subs_to$cabozantinib) + s <- f_path_onlyAllowedAfter(s,"cabozantinib",subs_to$cabozantinib) + + # evero post vegf, 2L+ + s <- f_path_RemoveFromLines(s,1,"evero") + s <- f_path_notAllowedAfter( s,"everolimus",subs_to$everolimus) + s <- f_path_onlyAllowedAfter(s,"everolimus",subs_to$everolimus) + + # ipi_nivo only 1L + s <- f_path_RemoveFromLines(s,2:ncol(s),"ipi_nivo") + + # len_evero is only allowed after 1 prior VEGF + # I do not think the current functions handle this - to discuss + s <- f_path_RemoveFromLines(s,1,"len_evero") + s <- f_path_notAllowedAfter( s,"len_evero",subs_to$len_evero) + s <- f_path_onlyAllowedAfter(s,"len_evero",subs_to$len_evero) + + # nivo_cabo 1L + s <- f_path_RemoveFromLines(s,2:ncol(s),"nivo_cabo") + + # nivo mono only 2L+ + s <- f_path_RemoveFromLines(s,1,"nivolumab") + s <- f_path_notAllowedAfter( s,"len_evero",subs_to$nivolumab) + s <- f_path_onlyAllowedAfter(s,"len_evero",subs_to$nivolumab) + +# pazo is 1L or no prior VEGF + # check if this handles or correctly + # also applies to sunitinib and tivozanib + + s <- f_path_notAllowedBefore( s,"tivozanib",subs_to$tivozanib) + s <- f_path_onlyAllowedAfter(s,"tivozanib",subs_to$tivozanib) + + # pem_len 1L + s <- f_path_RemoveFromLines(s,2:ncol(s),"pem_len") + + # sunitinib is 1L or no prior VEGF + + s <- f_path_notAllowedBefore( s,"sunitinib",subs_to$sunitinib) + s <- f_path_onlyAllowedAfter(s,"sunitinib",subs_to$sunitinib) + + # tivozanib 1L or no prior VEGF + s <- f_path_notAllowedBefore( s,"pazopinib",subs_to$pazopinib) + s <- f_path_onlyAllowedAfter(s,"pazopinib",subs_to$pazopinib) + + # Cannot repeat treat with IOs. for each io, impose not allowed after and not + # allowed before, for the other io's in the vector. this should make it impossible + # for a double treat to slip through + s <- Reduce( + x = one_in_class$io, + accumulate = FALSE, + init = s, + f = function(prev, io) { + not_allowed_after_these <- one_in_class$io[which(one_in_class$io != io)] + prev <- f_path_notAllowedAfter(prev,io,not_allowed_after_these) + out <- f_path_notAllowedBefore(prev,io,not_allowed_after_these) + } + ) + + # return the possible sequences now that treatment rules have been applied + return(s) +} + diff --git a/3_Functions/sequencing/sequences.R b/3_Functions/sequencing/sequences.R new file mode 100644 index 0000000..5a69fd1 --- /dev/null +++ b/3_Functions/sequencing/sequences.R @@ -0,0 +1,521 @@ +# Sequencing functions + +#' Function to generate treatment sequences through generating all permutations of comparators +#' +#' +#' @param comparators A vector of comparator names. Note that the +#' @param maxlines The maximum number of treatment lines (such that each row of permutations output has that many columns) +#' +#' +f_generate_sequences <- function(comparators, maxlines = 4) { + + strategies <- list() + + # The maximum number of living states is the number of lines +1 (for BSC). + # The number of model health states is number of lines +2 to include death too + max_living_states <- maxlines + 1 + + # List out all the possible treatment pathways by line, entering blank values + # when looking at lines past the first. So for 5th line patients there's only their + # treatment plus BSC to follow. + # + # lapply makes the list per line, rbindlist binds data.tables efficiently + # + rbindlist(lapply(1:maxlines, function(line) { + + # For this line, the number of remaining living states is the difference between + # line and the max of living states: + living_states_left <- max_living_states-line + + # use gtools' function to efficiently generate all possible permutations + # of treatments given the possible comparators, given starting from this + # line of therapy (i.e. so that at later lines there are less columns.) + out <- data.table(permutations(n = length(comparators), r = living_states_left, v = comparators)) + + # Adding in BSC as the rightmost column (data.table naming convention is VN + # where N is number. take the number of rows, and if it's less than the max + # then add empty columns to ensure there are maxlines+1 columnns at the end + out[[paste0("V",dim(out)[2]+1)]] <- "BSC" + nco <- dim(out)[2] + + # Make sure the output always has columns equal to the number of treatment lines + if (nco < max_living_states) { + ncol_to_add <- max_living_states - nco + for (col_to_add in 1:ncol_to_add) { + out[[paste0("V",nco+col_to_add)]] <- "" + } + } + + return(out) + })) +} + +# Functions to implement sequencing rules based on lists of what treatments are allowed after each other +f_get_only_after_lists <- function(i, population) { + output <- i[grep("only_after", names(i))] + output <- output[!(grepl("only_after_one", names(output)))] + output <- output[!(grepl("_2L_only_after", names(output)))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_only_after", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_not_immediate_after_lists <- function(i, population) { + output <- i[grep("not_immediate_after", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_not_immediate_after", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_one_in_list_lists <- function(i, population) { + output <- i[grep("one_allowed", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_only_one_allowed", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_only_after_one_lists <- function(i, population) { + output <- i[grep("only_after_one", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_only_after_one", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_2L_only_after_lists <- function(i, population) { + output <- i[grep("2L_only_after", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_2L_only_after", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_2L_only_immediate_after_lists <- function(i, population) { + output <- i[grep("_2L_only_immediate_after", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_2L_only_immediate_after", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_2L_only_one_lists <- function(i, population) { + output <- i[grep("_2L_only_one", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + names(output) <- sub("_2L_only_one", "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_allowed_lists <- function(i, population) { + output <- i[grep("allowed", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + output <- output[!(grepl("only_one_allowed", names(output)))] + names(output) <- sub(paste0("List_","pop",population,"_"), "", names(output)) + f_check_drugnames(i, output) + return(output) +} + +f_get_L1_lists <- function (i, population){ + output <- i[grep("_1L", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + output <- unlist(output[[1]]) + f_check_drugnames(i, output) + return(output) +} + +f_get_L2_lists <- function (i, population){ + output <- i[grep("_2L", names(i))] + output <- output[!(grepl("_2L_only_after", names(output)))] + output <- output[grep(paste0("pop", population), names(output))] + output <- unlist(output[[1]]) + f_check_drugnames(i, output) + return(output) +} + +f_get_L3_lists <- function (i, population){ + output <- i[grep("_3L", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + output <- unlist(output[[1]]) + f_check_drugnames(i, output) + return(output) +} + +f_get_L4_lists <- function (i, population){ + output <- i[grep("_4L", names(i))] + output <- output[grep(paste0("pop", population), names(output))] + output <- unlist(output[[1]]) + f_check_drugnames(i, output) + return(output) +} + +#' Check for typos in drug names +#' Verifies that all names - name of list and names of drugs entered are correct +f_check_drugnames <- function(i, output) { + if(sum(unlist(output) %in% i$List_comparators) != length(unlist(output))) { + message("One or more drugs not found in following list(s).") + print(output) + stop("Check source file for typos") + } +} + + +#' Utility function to apply the "only after" ruleset to a treatment using subset +#' filtering. +#' +#' Note that this is a reverse filter (note the ! in front of the conditions). Handy +#' for only returning those results which don't match the conditions. in this case +#' it's essentially like performing a filter for those conditions. +#' +#' @description At each line, the treatments in rule aren't allowed after trt. filter out. +#' +#' @param perms permutatins of possible treatments +#' @param trt the treatment to apply this filtering alg to +#' @param rule a character vector of those treatments that aren't allowed after trt +#' + +f_path_onlyAllowedAfter <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + #so flag perms that have trt before drugs in rule + cat("applying rule.", trt, "is only allowed after", rule,"\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + #first off delete perms with trt first line + perms <- perms[perms[,1] != trt,] + + for (n in 2:ncol(perms)) { + # as.data.frame needed to ensure code works for when n=ncol(perms) + # (as it reduces to a character vector and crashes the code) + + # flag perms with any of the drugs in 'rule' before line n + rule_drugs_before_line_n <- as.data.frame(apply(as.data.frame(perms[,1:(n-1)]), 1, function(x) x %in% rule)) + if (ncol(rule_drugs_before_line_n)>nrow(rule_drugs_before_line_n)) rule_drugs_before_line_n <- t(rule_drugs_before_line_n) #needed to ensure format of temp for same reason as.data.frame is needed + rule_drugs_before_line_n <- as.logical(apply(rule_drugs_before_line_n, 1, sum)) + + #flag perms with trt in line n + trt_in_line_n <- perms[,n] == trt + + violators <- trt_in_line_n & !rule_drugs_before_line_n + + + # #temp lines for testing only + # perms[,6] <- rule_drugs_before_line_n + # perms[,7] <- trt_in_line_n + # perms[,8] <- violators + + #remove violating perms + perms <- perms[!violators,] + + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) +} + +f_path_notAllowedImmediateAfter <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + #so flag perms that have trt before drugs in rule + cat("applying rule.", trt, "is not allowed immediately after", rule,"\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + for (n in 2:ncol(perms)) { + # as.data.frame needed to ensure code works for when n=ncol(perms) + # (as it reduces to a character vector and crashes the code) + + # flag perms with any of the drugs in 'rule' in line n-1 + rule_drugs_before_line_n <- as.data.frame(apply(as.data.frame(perms[,(n-1)]), 1, function(x) x %in% rule)) + if (ncol(rule_drugs_before_line_n)>nrow(rule_drugs_before_line_n)) rule_drugs_before_line_n <- t(rule_drugs_before_line_n) #needed to ensure format of temp for same reason as.data.frame is needed + rule_drugs_before_line_n <- as.logical(apply(rule_drugs_before_line_n, 1, sum)) + + #flag perms with trt in line n + trt_in_line_n <- perms[,n] == trt + + violators <- trt_in_line_n & rule_drugs_before_line_n + + + # #temp lines for testing only + # perms[,6] <- rule_drugs_before_line_n + # perms[,7] <- trt_in_line_n + # perms[,8] <- violators + # + #remove violating perms + perms <- perms[!violators,] + + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) +} + +f_path_one_in_list <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + #so flag perms that have trt before drugs in rule + cat("applying rule", trt, ":", rule, "cannot be in one permutation\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + temp <- apply(perms, 1, function(x) sum(x %in% rule)) + + ##temp lines for testing only + #perms[,6] <- temp + + violators <- temp > 1 + + #remove violating perms + perms <- perms[!violators,] + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) +} + +f_path_only_after_one <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + cat("applying rule:", trt, "can only be after ONE of", rule, "\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + #first off delete perms with trt first line + perms <- perms[perms[,1] != trt,] + + #if drug is 2L then can only have 1 of the tx in rule so count from col 3 + #exclude last column as this is always BSC + for (n in 3:(ncol(perms)-1)) { + #if statement below ensures code only runs when there are at least 2 different + #entries in the perms column. If is only one then is only BSC or "" + #code crashes if empty column of "" so this stops it + if (length(unique(perms[,n]))>1) { + # flag perms with any of the drugs in 'rule' before line n + rule_drugs_before_line_n <- as.data.frame( + apply(as.data.frame(perms[,1:(n-1)]), 1, function(x) x %in% rule) + ) + if (ncol(rule_drugs_before_line_n) > nrow(rule_drugs_before_line_n)) { + rule_drugs_before_line_n <- t(rule_drugs_before_line_n) + } #this line needed to ensure format of temp for same reason as.data.frame is needed + + rule_drugs_before_line_n <- apply(rule_drugs_before_line_n, 1, sum) + rule_drugs_before_line_n <- rule_drugs_before_line_n >= 2 + + #flag perms with trt in line n + trt_in_line_n <- perms[,n] == trt + + violators <- trt_in_line_n & rule_drugs_before_line_n + + + # #temp lines for testing only + # perms[,6] <- rule_drugs_before_line_n + # perms[,7] <- trt_in_line_n + # perms[,8] <- violators + + #remove violating perms + perms <- perms[!violators,] + } + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) + +} + +f_path_allowed <- function(perms, rule) { + #objective of function is to identify perms that violate rule and exclude + cat("applying rule:", rule, "are only allowed treatments.\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + rule <- c(rule, "BSC", "") + + for (n in 1:ncol(perms)) { + perms <- perms[perms[,n] %in% rule,] + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) + +} + +f_path_drug_lines <- function(perms, L1, L2, L3, L4){ + cat("applying rule: drug line restrictions.\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + n <- 0 + for (line in list(L1,L2,L3,L4)){ + n <- n + 1 + line <- c(line, "BSC", "") + #print(line) + perms <- perms[(perms[,n] %in% line),] + } + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) +} + +f_path_2L_only_after <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + cat("applying rule:", trt, "as 2L+ only allowed after", rule, "\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + #exclude last column as this is always BSC + for (n in 2:(ncol(perms)-1)) { + + # flag perms with any of the drugs in 'rule' before line n + rule_drugs_before_line_n <- as.data.frame( + apply(as.data.frame(perms[,1:(n-1)]), 1, function(x) x %in% rule) + ) + if (ncol(rule_drugs_before_line_n) > nrow(rule_drugs_before_line_n)) { + rule_drugs_before_line_n <- t(rule_drugs_before_line_n) + } #this line needed to ensure format of temp for same reason as.data.frame is needed + + rule_drugs_before_line_n <- apply(rule_drugs_before_line_n, 1, sum) + rule_drugs_before_line_n <- rule_drugs_before_line_n >= 1 + + #flag perms with trt in line n + trt_in_line_n <- perms[,n] == trt + + violators <- trt_in_line_n & !rule_drugs_before_line_n + + + # #temp lines for testing only + # perms[,6] <- rule_drugs_before_line_n + # perms[,7] <- trt_in_line_n + # perms[,8] <- violators + + #remove violating perms + perms <- perms[!violators,] + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) + +} + +f_path_2L_only_immediate_after <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + cat("applying rule:", trt, "as 2L+ only allowed immediately after", rule, "\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + #exclude last column as this is always BSC + for (n in 2:(ncol(perms)-1)) { + #flag perms with trt AND (rule[1] or rule[..] or rule[n]) + perms_with_rule_drugs <- apply(perms,2, function(i){i %in% rule}) + perms_with_rule_drugs <- apply(perms_with_rule_drugs,1,function(i){sum(i)>0}) + + perms_with_trt <- apply(perms,2, function(i){i %in% trt}) + perms_with_trt <- apply(perms_with_trt, 1, function(i){sum(i)>0}) + + perms_with_trt_AND_rule <- perms_with_rule_drugs & perms_with_trt + + trt_at_line_n <- perms[,n] == trt + rule_not_at_line_n_minus_1 <- !(perms[,(n-1)] %in% rule) + + violators <- perms_with_trt_AND_rule & trt_at_line_n & rule_not_at_line_n_minus_1 + + # #temp lines for testing only + # perms[,6] <- perms_with_trt_AND_rule + # perms[,7] <- trt_at_line_n + # perms[,8] <- rule_not_at_line_n_minus_1 + # perms[,9] <- violators + + #remove violating perms + perms <- perms[!violators,] + } + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) + +} + + +f_path_2L_only_one <- function(perms, trt, rule) { + #objective of function is to identify perms that violate rule and exclude + cat("applying rule:", trt, "as 2L+ only allowed one of", rule, "\n") + cat("Permutations before applying rule:", nrow(perms), "\n") + + + rule_drugs_2Lplus <- as.data.frame( + apply(as.data.frame(perms[,2:ncol(perms)]), 1, function(x) x %in% rule) + ) + + if (ncol(rule_drugs_2Lplus) > nrow(rule_drugs_2Lplus)) { + rule_drugs_2Lplus <- t(rule_drugs_2Lplus) + } + + rule_drugs_2Lplus <- apply(rule_drugs_2Lplus, 1, sum) + violators <- rule_drugs_2Lplus > 1 + + #remove violating perms + perms <- perms[!violators,] + + cat("Permutations after applying rule :", nrow(perms),"\n") + return(perms) + +} + +f_path_tx_restrict <- function(sequences, + allowed, L1, L2, L3, L4, + only_after, not_immediate_after, + one_in_list, only_after_one, + L2_only_after, L2_only_immediate_after, + L2_only_one) { + + s <- sequences + + cat("Dropping drugs not allowed for this population.\n") + s <- f_path_allowed(s, allowed[[1]]) + + s <- f_path_drug_lines(s, L1, L2, L3, L4) + + if (length(only_after) > 0) { + for (n in 1:length(only_after)) { + print(names(only_after)[n]) + s <- f_path_onlyAllowedAfter(s, names(only_after)[n], only_after[[n]]) + } + } + + if (length(not_immediate_after) > 0) { + for (n in 1:length(not_immediate_after)) { + print(names(not_immediate_after)[n]) + s <- f_path_notAllowedImmediateAfter(s, names(not_immediate_after)[n], not_immediate_after[[n]]) + } + } + + if (length(one_in_list) > 0) { + for (n in 1:length(one_in_list)) { + print(names(one_in_list)[n]) + s <- f_path_one_in_list(s, names(one_in_list)[n], one_in_list[[n]]) + } + } + + if (length(only_after_one) > 0) { + for (n in 1:length(only_after_one)) { + print(names(only_after_one)[n]) + s <- f_path_only_after_one(s, names(only_after_one)[n], only_after_one[[n]]) + } + } + + if (length(L2_only_after) > 0) { + for (n in 1:length(L2_only_after)) { + print(names(L2_only_after)[n]) + s <- f_path_2L_only_after(s, names(L2_only_after)[n], L2_only_after[[n]]) + } + } + + if (length(L2_only_immediate_after) > 0) { + for (n in 1:length(L2_only_immediate_after)) { + print(names(L2_only_immediate_after)[n]) + s <- f_path_2L_only_immediate_after(s, names(L2_only_immediate_after)[n], L2_only_immediate_after[[n]]) + } + } + + if (length(L2_only_one) > 0) { + for (n in 1:length(L2_only_one)) { + print(names(L2_only_one)[n]) + s <- f_path_2L_only_one(s, names(L2_only_one)[n], L2_only_one[[n]]) + } + } + + # return the possible sequences now that treatment rules have been applied + return(s) +} diff --git a/3_Functions/survival/Survival_functions.R b/3_Functions/survival/Survival_functions.R new file mode 100644 index 0000000..1619e7a --- /dev/null +++ b/3_Functions/survival/Survival_functions.R @@ -0,0 +1,2352 @@ + + +# Cleaning functions ------------------------------------------------------ + +# This function gets the KM data into the proper format to use with the survminer package +# This packages requires that column names are the same (you have to specify in advance) +# t_multiplier can be used to change the time unit of the analysis. +# time unit has been specified in weeks + +f_ce_km_MakeDatSurvFriendly <- function(Data_required, time_column, event_column, t_multiplier = 1) { + dat <- Data_required[,c(time_column,event_column),with =FALSE] + colnames(dat) <- c("t", "ec") + dat[,"t"] <- dat[,"t"] * t_multiplier + return(dat) +} + + + +# Extrapolation functions ------------------------------------------------- + + +# ~ Routing function ------------------------------------------------------ + + +#' router function for extrapolations. Extrapolation functions called are defined +#' after this function so you can read things in order. +#' +#' @param v_cycles vector of cycles +#' @param coefficients coefficients for this distribution +#' @param distribution distribution identifier +#' +f_extrapolate <- function(v_cycles, coefs, distribution) { + + if(is.null(coefs)) return(NULL) + + if (!distribution %in% c("gengamma","exp","weibull","lnorm","gamma","gompertz","llogis","lognormal")) stop("distribution not listed") + + # Check that the coefs match the distribution stated + + if(distribution == "gengamma") {if (sum(names(coefs) != c("mu","sigma","Q")) > 0) {stop("incorrect coefficients")} + } else if(distribution == "exp") {if (length(coefs) > 1) {stop("incorrect coefficients")} + } else if(distribution == "weibull") {if (sum(names(coefs) != c("shape", "scale")) > 0) {stop("incorrect coefficients")} + } else if(distribution == "lnorm") {if (sum(names(coefs) != c("meanlog", "sdlog")) > 0) {stop("incorrect coefficients")} + } else if(distribution == "gamma") {if (sum(names(coefs) != c("shape", "rate")) > 0) {stop("incorrect coefficients")} + } else if(distribution == "gompertz") {if (sum(names(coefs) != c("shape", "rate")) > 0) {stop("incorrect coefficients")} + } else if(distribution == "llogis") {if (sum(names(coefs) != c("shape", "scale")) > 0) {stop("incorrect coefficients")} + } + + # select the correct distribution to apply and produce table vector of S(t) per cycle + + if (distribution == "exp") {return(function_apply_exp(v_cycles, coefs))} + if (distribution == "weibull") {return(function_apply_weibull(v_cycles, coefs))} + if (distribution == "gengamma") {return(function_apply_gengamma(v_cycles, coefs))} + if (distribution == "lnorm") {return(function_apply_lnorm(v_cycles, coefs))} + if (distribution == "gamma") {return(function_apply_gamma(v_cycles, coefs))} + if (distribution == "gompertz") {return(function_apply_gompertz(v_cycles, coefs))} + if (distribution == "llogis") {return(function_apply_llogis(v_cycles, coefs))} + + +} + + +# ~ extrapolation functions ----------------------------------------------- + + +# these functions each produce a vector of S(t) per cycle for the selected distriubtion + +function_apply_exp <- function(v_cycles, rate) { + output<- exp(-1* c(exp(rate)) * v_cycles) + output +} + +function_apply_weibull <- function(v_cycles, coefs) { + output<- 1 - pweibull(v_cycles, exp(coefs[1]), exp(coefs[2])) + output +} + +function_apply_lnorm <- function(v_cycles, coefs) { + output<- 1 - plnorm(v_cycles, coefs[1], 1/exp(-1*coefs[2])) + output +} + +function_apply_llogis <- function(v_cycles, coefs) { + output<- 1/(1+(v_cycles*exp(-1*coefs[2]))^(1/exp(-1*coefs[1]))) + output +} + +function_apply_gengamma <- function(v_cycles, coefs) { + pgamma <- pgamma(((-1*coefs[3])^-2)*exp(-1*coefs[3]*-((log(v_cycles)-(coefs[1]))/exp(coefs[2]))), ((-1*coefs[3])^-2), scale = 1) + output<- if (coefs[3]<0) {pgamma} else {1-pgamma} + output +} + +function_apply_gompertz <- function(v_cycles, coefs) { + output<- exp((-1/coefs[1])*exp(coefs[2])*(exp(coefs[1]*v_cycles)-1)) + output +} + +function_apply_gamma <- function(v_cycles, coefs) { + output<- 1-((exp(gammaln(exp(coefs[1])))*pgamma(exp(coefs[2])*v_cycles,exp(coefs[1]),1))/gamma(exp(coefs[1]))) + output +} + + +# plotting functions ------------------------------------------------------ + + + +# function to produce survival analysis output plots + +f_surv_plot <- function(v_years, Plot_data, ipd, xlim, km_time_mult) { + + km_line_data <- f_ce_km_gen_KMPlotData( + DATA = ipd, + timevar = "timew", + eventvar = "event_censor" + ) + km_line_data$dist <- "KM" + km_line_data$t <- km_line_data$t * km_time_mult + + dat <- data.table(Plot_data) + dists <- dimnames(dat)[[2]] + dat$t <- v_years + dat <- melt.data.table( + data = dat, + measure.vars = dists, + variable.name = "dist", + value.name = "s_t" + ) + + dat <- dat[t <= xlim,] + dat <- rbind(km_line_data,dat) + + # colours + colour <- c( + KM = "black", + gengamma = "blue", + exp = "red", + weibull = "green", + lnorm = "purple", + gamma = "yellow", + gompertz = "orange", + llogis = "brown" + ) + size <- c( + KM = 1.5, + gengamma = 0.5, + exp = 0.5, + weibull = 0.5, + lnorm = 0.5, + gamma = 0.5, + gompertz = 0.5, + llogis = 0.5 + ) + + + ggplot(data = dat, aes(x = t, y = s_t, colour = dist)) + + geom_line() + + ylim(0,1) + + xlim(0,xlim) + + labs(x ="Years", y = "Proportion Surviving") + + theme_bw() + theme(legend.position = "bottom") + + scale_colour_manual(name="Curve Fit",values = colour) + + scale_size_manual(name="Curve Fit",values = size) + + scale_x_continuous(expand = expansion(mult = c(0,0))) + + scale_y_continuous(expand = expansion(mult = c(0,0))) + + guides( + color = guide_legend(title = NULL), + linetype = "none", + size = "none" + ) +} + + + +# Takes the survival models and draws Kaplan Meier with a risk table under it. +# Then also takes the survival extrapolations and overlays them + +f_extrap_plot <- function(SurvEstimate, Data_required, curvefits_data, time_vector, xlim, break_by) { + + # bit of reformatting + dat <- data.table(curvefits_data) + dists <- dimnames(dat)[[2]] + dat$t <- time_vector + dat <- melt.data.table( + data = dat, + measure.vars = dists, + variable.name = "dist", + value.name = "s_t" + ) + + cols <- c("KM" = "#030303", + "gengamma" = "#FF4040", + "exp" = "#0000FF", + "weibull" = "#D2691E", + "lnorm" = "#EE6A50", + "gamma" = "#556B2F", + "gompertz" = "#FF7F00", + "llogis" = "#00CDCD") + thick <- c("KM" = 1.5, + "gengamma" = 0.5, + "exp" = 0.5, + "weibull" = 0.5, + "lnorm" = 0.5, + "gamma" = 0.5, + "gompertz" = 0.5, + "llogis" = 0.5) + legend <- c("KM", + "Generalised Gamma", + "Exponential", + "Weibull", + "Lognormal", + "Gamma", + "Gompertz", + "Loglogistic") + + legend <- c("Exponential", # sorted by alphabetical order + "Gamma", + "Generalised Gamma", + "Gompertz", + "KM", + "Lognormal", + "Loglogistic", + "Weibull") + + + # plot <- ggsurvplot( + plot <- ggsurvplot( + fit = SurvEstimate, + data = Data_required, + combine = TRUE, + censor = TRUE, + risk.table = TRUE, + conf.int = TRUE, + break.x.by = break_by, + break.y.by = 0.1, + xlim = c(0, xlim), + xlab = "Years", + size = 0.72, + legend.title = '', + legend.labs = c("KM"), + risk.table.y.text.col = FALSE + ) + + plot$plot <- plot$plot + + geom_line(aes( + x = t, + y = s_t, + colour = dist + ), + data = dat, + alpha = 0.8, + linewidth = 0.3) + + scale_color_manual(values = cols, labels = legend) + + scale_size_manual(values = thick) + + scale_y_continuous(expand = expansion(c(0,0.05))) + + scale_x_continuous(expand = expansion(c(0.05,0.01))) + + guides( + color = guide_legend(title = NULL), + linetype = "none", + size = "none" + ) + suppressWarnings(suppressMessages(plot)) +} + + + + +# Makes KM plotting data to put onto a plot without having to us survminer. +# +# Could use this instead of function_extrap_plot +# +f_ce_km_gen_KMPlotData <- function(DATA, timevar, eventvar) { + #make KM testing ground + #load example data and do survfit + x <- as.data.table(DATA) + y <- Surv(time = unlist(x[,timevar, with =FALSE],use.names = F), unlist(x[,eventvar,with =FALSE],use.names = F)) + z <- survfit(y ~ 1) + + #make adjustments to data required for graph (i.e. double both datasets) + t <- rep(z$time,2) + s_t <- rep(z$surv,2) + + #sort parameters as required (by time ascending, by survival descending), adding extra data points + t <- append(0,t[order(t)]) + s_t <- append(1,append(1,s_t[order(s_t,decreasing = TRUE)]))[1:length(t)] + + #put into a dataframe and return as output of function + df <- data.table(t = t, s_t = s_t) + return(df) +} + + + +# Run all models for all data ---------------------------------------------------------------- + +#' Function to run TSD14 survival analysis for all available data by going through +#' all possible populations, lines, regimen, trials and endpoints, returning NULL +#' if the data is empty and a set of outputs if there is data. +#' +#' Contains options for producing plots (considerable impact on speed, but useful for reporting) +#' +#' @param r_pld All of the survival analyses in the PLMTE format. `i$surv$pld` in the model structure +#' @param id Identifiers for ipd - `i$id$ipd` in the model structure +#' @param lookups lookup tables to translate between numbers and labels: `i$lookup$ipd` in the model structure +#' @param distnames `i$distnames` in the model structure +#' @param cl_y cycle length in years `p$basic$cl_y` in the model structure +#' @param xlim_survplots_yr x-limit for the plots: `p$misc$plot$xlim_survplots_yr` in the model structure +#' @param t_yr vector of time in years: `p$basic$t_yr` in the model structure +#' @param draw_plots logical for drawing plots +#' @param verbose logical for extra console output that can help with debugging +#' @param min_obs cutoff for running regressions. `N 0)) { + cat(paste0( + "Survival analysis - population: ", lookups$pop[Number == trial_population, Description], + "\t line: " , lookups$line[Number == l , Description], + "\t molecule: " , lookups$mol[Number == m , Description], + "\t trial: " , tr, + "\t endpoint: " , lookups$endpoint[Number == en , Description], "\n" + )) + } + + + # If that dataset is empty (i.e., has no rows), then there's no data for it. return + # nothing + if (nrow(ipd) == 0) { + return(list( + pop = lookups$pop[ Number == trial_population,Description], + line = lookups$line[ Number == l,Description], + mol = lookups$mol[ Number == m,Description], + tr = lookups$trial[ Number == tr,Description], + endpoint = lookups$endpoint[Number == en,Description], + ipd = NULL, + fs_fits = NULL, + gof = NULL, + st = NULL, + plot = NULL + )) + } else if (nrow(ipd) < min_obs) { + + warning(paste0( + lookups$pop[Number == trial_population, Description], + " population " , lookups$line[Number == l , Description], + " " , lookups$mol[Number == m , Description], + " " , lookups$endpoint[Number == en , Description], + " from " , lookups$trial[Number == tr , Description], + " has ",nrow(ipd), "(<", min_obs, ") observations (i.e. < the minimum set by you!). Skipping this PLMTE." + ),immediate. = TRUE) + + return(list( + pop = lookups$pop[ Number == trial_population,Description], + line = lookups$line[ Number == l,Description], + mol = lookups$mol[ Number == m,Description], + tr = lookups$trial[ Number == tr,Description], + endpoint = lookups$endpoint[Number == en,Description], + ipd = NULL, + fs_fits = NULL, + gof = NULL, + st = NULL, + plot = NULL + )) + + + } else { + + # If there IS data, continue on to use that data to produce full survival analysis results. + # + # Remember that the IPD is sensitive so should not ever be saved to disk. Being stored in i + # and never saved is the way to do this. + + # Even within the multiple nesting by population, line, molecule, trial and endpoint, + # we need ANOTHER (6th) layer - the parametric model: + + + + + + + fs_fits <- lapply(distnames, function(dist) { # applying all parametric survival curves in the list of distNames + fs_fit <- flexsurvreg( + formula = Surv(t, e) ~ 1, + data = ipd, + dist = dist + ) + return(list( + coefs = coefficients(fs_fit), # coefficients for the fitted model + vcov = vcov(fs_fit), # variance covariance matrix for the fitted model + fit = c(AIC= AIC(fs_fit), BIC=BIC(fs_fit), logLik = logLik(fs_fit)) # goodness of fit statistics for the fitted model + )) + }) + + gof <- do.call(rbind, lapply(distnames, function(dist) fs_fits[[dist]]$fit)) + + + st <- matrix( + unlist(lapply(distnames, function(dist) { + f_extrapolate(t_cyc, fs_fits[[dist]]$coefs, dist) + })), + ncol = length(distnames), + dimnames = list(NULL, distnames), + byrow = FALSE + ) + + if (draw_plots) { + + plot <- { + # First the IPD is produced in a format that survminer will accept. Data must all be + # the same format with the same column names. + # this assumes no covariate adjustment. + + sm_ipd <- f_ce_km_MakeDatSurvFriendly( + Data_required = ipd, + time_column = "t", # note that this is taking IPD in weeks + event_column = "e", + t_multiplier = cl_y # data in weeks, cycle length in plot years + ) + + # get the survival analysis in the form we need for survminer + # and make the extrapolations we need for survminer + + form <- Surv(t, ec) ~ 1 + sm_surv_est <- surv_fit(formula = form, data = sm_ipd) + + # make the plot with the input data: + survival_plot <- suppressMessages(f_extrap_plot( + SurvEstimate = sm_surv_est, + Data_required = sm_ipd, + curvefits_data = st, + time_vector = t_yr, + xlim = xlim_survplots_yr, + break_by = round(xlim_survplots_yr/8,0) + )) + list( + ipd = sm_ipd, + formula = form, + plot = survival_plot + ) + } + } else { + plot <- NULL + } + + # Now that we've done everything for this dataset, return a list of the stuff + # we need for it: + return(list( + pop = lookups$pop[ Number == trial_population,Description], + line = lookups$line[ Number == l,Description], + mol = lookups$mol[ Number == m,Description], + tr = lookups$trial[ Number == tr,Description], + endpoint = lookups$endpoint[Number == en,Description], + ipd = ipd, + fs_fits = fs_fits, + gof = gof, + st = st, + plot = plot + )) + } + }) + }) + }) + }) + }) +} + + +# Reporting --------------------------------------------------------------- + +#' Function to automatically bring together graphs and goodness of fit for all TSD14 models +#' +#' @param fs_res the results from f_surv_runAllTSD14 +#' @param id the identifiers for all the numerical indices to separate out populations, lines etc +#' +f_surv_makeTSD14Report <- function(fs_res, id, lookup) { + + doc_surv <- read_docx() %>% + body_add_toc(level = 4) + + for(subgroup in names(id$pop)) { + doc_surv <- doc_surv %>% + body_add_par(paste0(lookup$pop[Number == id$pop[subgroup],Description], " population"),style = "heading 1") + for (l in names(id$line)) { + doc_surv <- doc_surv %>% + body_add_break() %>% + body_add_par(lookup$line[Number == id$line[l],Description],style = "heading 2") + for (m in names(id$mol)) { + for (tr in names(id$trial)) { + for (en in names(id$endpoint)) { + # cat(paste0(paste(subgroup, l, m, tr, en, collapse = "\t"),"\n")) + if (!is.null(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$ipd)) { + + row_to_bold <- which.min(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$gof[,"AIC"]) + + gof_tab <- round(as.data.frame(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$gof), 2) %>% + rownames_to_column(var = "Distribution") %>% + mutate(Distribution = case_when( + Distribution == "gengamma" ~ "Generalised gamma", + Distribution == "exp" ~ "Exponential", + Distribution == "weibull" ~ "Weibull", + Distribution == "lnorm" ~ "Log-normal", + Distribution == "gamma" ~ "Gamma", + Distribution == "gompertz" ~ "Gompertz", + Distribution == "llogis" ~ "Log-logistic" + )) %>% + flextable() %>% + bold(i = row_to_bold, bold = TRUE, part = "body") %>% + flextable::set_header_labels( + AIC = "A.I.C.", + BIC = "B.I.C.", + logLik = "Log likelihood" + ) %>% + autofit() + + + doc_surv <- doc_surv %>% + body_add_par("",style = "Normal") %>% + body_add_par("",style = "Normal") %>% + body_add_par(paste0( + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$mol, + ", ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$endpoint + ), style = "heading 3") %>% + body_add_par("",style = "Normal") %>% + body_add_normal( + paste0( + "The Figure below shows ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$endpoint, + " in the ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$pop), + " population - ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$line), + " ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$mol, + " (trial ", + tr, + "). A plot is provided for visual fit, alongside goodness-of-fit statistics for decision making" + ) + ) %>% + body_add_par("",style = "Normal") %>% + body_add_figure_legend( + legend = paste0( + "Extrapolation - ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$endpoint, + " in the ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$pop), + " population - ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$line), + " ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$mol, + " (trial ", + tr, + ")" + ), + bookmark = gsub("_", "", paste0("fig_", subgroup, l, m, tr, en)), + + ) %>% + body_add_plot(print(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$plot$plot), width = 6) %>% + body_add_par("", style = "Normal") %>% + body_add_table_legend( + paste0( + "Goodness of fit - ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$endpoint, + " in the ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$pop), + " population - ", + tolower(fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$line), + " ", + fs_res[[subgroup]][[l]][[m]][[tr]][[en]]$mol, + " (trial ", + tr, + ")" + ), + bookmark = gsub("_", "", paste0("table_", subgroup, l, m, tr, en)) + ) %>% + body_add_flextable(gof_tab, + topcaption = TRUE, + split = TRUE + ) %>% + body_add_par("", style = "Normal") %>% + body_add_par("", style = "Normal") + } + } + } + } + } + } + return(doc_surv) +} + + +# Comparative efficacy ---------------------------------------------------- + +#' Function to generate an empty list representing an evidence network with space for +#' application of hazard ratios to populate modeled comparisons between different +#' drugs across all populations, lines, molecules, trials and endpoints +#' +#' +#' @param id a list containing identifiers for the different PLMTE +#' @param lookup a translator between numerical ids and descriptions of them +#' +#' +f_NMA_generateNetwork <- function(id,lookup) { + lapply(id$pop, function(trial_population) { + lapply(id$line, function(l) { + lapply(id$mol, function(m) { + lapply(id$trial, function(tr) { + lapply(id$endpoint, function(en) { + list( + dest = list( + pop = paste0("pop_" ,trial_population), + line = paste0("line_" ,l), + mol = paste0("mol_" ,m), + trial = paste0("trial_" ,tr), + endpoint = paste0("endpoint_" ,en) + ), + orig = list( + pop = NULL, + line = NULL, + mol = NULL, + trial = NULL, + endpoint = NULL, + dist = NULL, + source = NULL + ), + hr = 1, + fp = list() + ) + }) + }) + }) + }) + }) +} + + +#' Function to pull out one sample from CODA in the RCC format. Notice that the +#' naming of the columns MUST match exactly. Spelling errors will break the function. +#' +#' @param dat data (CODA sample), run must have column called "Run" +#' @param n which iteration to pull for each grouping +#' +f_PHNMA_getSample <- function(dat, n) dat[Run == n, .(HR = mean(HR)), by = list(Population, + Line, + Molecule, + Endpoint, + Reference.treatment, + Reference.trial)] + +# e.g. Get the 2nd sample HRs +# hr_table <- f_PHNMA_getSample(i$PHNMA$data,2) + + + + +#' Function to "link" the result of f_NMA_generateNetwork by entering hazard ratios +#' and origin identifiers to the foundations it lays +#' +#' @param network the result of f_NMA_generateNetwork +#' @param hr_table a table of HRs in line with the formatting of the CODA sample for the RCC project. Numbering MUST align with id for f_NMA_generateNetwork +#' @param rel_eff_table the named range R_table_eff_data_settings from the excel front end +#' +f_NMA_linkPHNMA <- function(network, hr_table) { + + # Simply put, this function takes each row of the NMA HR table and puts that information + # along with origin information into network. + + Reduce( + x = 1:nrow(hr_table), + init = network, + accumulate = FALSE, + f = function(prev, hr_tab_row) { + + dat <- as.list(hr_table[hr_tab_row,]) + + # Establish the destination, d as the labeling that is used to identify + # list elements. trial_0 is asserted because application of comp eff isn't really + # associated with any trial, so internal numbering within group will always + # be 0 + + d <- list( + pop = paste0("pop_" , .subset2(dat,"Population")), + line = paste0("line_" , .subset2(dat,"Line")), + mol = paste0("mol_" , .subset2(dat,"Molecule")), + trial = paste0("trial_" , .subset2(dat,"Reference.trial")), # trial_0 assumed for HR applied - could be a flag like NMA but that creates complication later + endpoint = paste0("endpoint_", .subset2(dat,"Endpoint")) + ) + + # Extract the PLMTE for this destination, so that we have a list to manipulate + # directly without fear of overwriting anything :) + + plmte <- f_misc_get_plmte(prev,d) + + # Derive the origin from dat, these values can then go in plmte$orig + + o <- list( + pop = paste0("pop_" , .subset2(dat,"Population")), + line = paste0("line_" , .subset2(dat,"Line")), + mol = paste0("mol_" , .subset2(dat,"Reference.treatment")), + trial = paste0("trial_" , .subset2(dat,"Reference.trial")), + endpoint = paste0("endpoint_", .subset2(dat,"Endpoint")) + ) + + # Place the relevant information into the destination PLMTE. Whether or not + # this information is used in the end is decided in the function which follows + # this one, called f_NMA_AddAssumptionsToNetwork + + if (!is.null(o$pop)) plmte$orig$pop <- o$pop + if (!is.null(o$line)) plmte$orig$line <- o$line + if (!is.null(o$mol)) plmte$orig$mol <- o$mol + if (!is.null(o$trial)) plmte$orig$trial <- o$trial + if (!is.null(o$endpoint)) plmte$orig$endpoint <- o$endpoint + if (!is.null(dat$HR)) plmte$hr <- dat$HR + + # Take the updated plmte, being careful not to overwrite anything and + # slot it back into its destination, returning the updated network. + + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } + ) +} + + +#' Function to "link" the result of f_NMA_generateNetwork by entering fractional +#' polynomial network meta analysis (FPNMA) parameters table (which contains numerical +#' identifiers for population line molecule trial and endpoint, PLMTE) +#' +#' @param network the result of f_NMA_generateNetwork +#' @param hr_table a table of HRs parameters in line with the formatting of the CODA sample for the RCC project. Numbering ids MUST align with id for f_NMA_generateNetwork! +#' @param rel_eff_table the named range R_table_eff_data_settings from the excel front end for any other bits/assumptions that're required +#' +f_NMA_linkFPNMA <- function(network, destinations, hr_table, time_horizon) { + + # This function takes each item of hr_list and puts that information + # along with origin information into network. + + Reduce( + x = 1:nrow(destinations), + init = network, + accumulate = FALSE, + f = function(prev, dest_row_number) { + + dat <- as.list(destinations[dest_row_number,]) + + # Establish the destination, d as the labeling that is used to identify + # list elements. trial_0 is asserted because application of comp eff isn't really + # associated with any trial, so internal numbering within group will always + # be 0 + + d <- list( + pop = paste0("pop_" , .subset2(dat,"Population")), + line = paste0("line_" , .subset2(dat,"Line")), + mol = paste0("mol_" , .subset2(dat,"Molecule")), + trial = paste0("trial_" , .subset2(dat,"Reference.trial")), # trial_0 assumed for HR applied - could be a flag like NMA but that creates complication later + endpoint = paste0("endpoint_", .subset2(dat,"Endpoint")) + ) + dN <- list( + pop = .subset2(dat,"Population"), + line = .subset2(dat,"Line"), + mol = .subset2(dat,"Molecule"), + trial = .subset2(dat,"Reference.trial"), + endpoint = .subset2(dat,"Endpoint") + ) + + # Extract the PLMTE for this destination, so that we have a list to manipulate + # directly without fear of overwriting anything :) + plmte <- prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] + + # Derive the origin from dat, these values can then go in plmte$orig + o <- list( + pop = paste0("pop_" , .subset2(dat,"Population")), + line = paste0("line_" , .subset2(dat,"Line")), + mol = paste0("mol_" , .subset2(dat,"Reference.treatment")), + trial = paste0("trial_" , .subset2(dat,"Reference.trial")), + endpoint = paste0("endpoint_", .subset2(dat,"Endpoint")) + ) + + # Filter down hr_table to matching destination given origin reference treatment + # and keep the time-varying HRs. + fp <- + list(HR = hr_table[Population == dN$pop & + Line == dN$line & + Molecule == dN$mol & + Reference.treatment == .subset2(dat,"Reference.treatment") & + Reference.trial == dN$trial & + Endpoint == dN$endpoint, HR]) + + if(length(fp$HR) < time_horizon+1) { + fp_extend <- rep(fp$HR[length(fp$HR)],((time_horizon+1)-length(fp$HR))) + fp$HR <- c(fp$HR,fp_extend)[1:(time_horizon+1)] + } + + # Place the relevant information into the destination PLMTE. Whether or not + # this information is used in the end is decided in the function which follows + # this one, called f_NMA_AddAssumptionsToNetwork + if (!is.null(o$pop)) plmte$orig$pop <- o$pop + if (!is.null(o$line)) plmte$orig$line <- o$line + if (!is.null(o$mol)) plmte$orig$mol <- o$mol + if (!is.null(o$trial)) plmte$orig$trial <- o$trial + if (!is.null(o$endpoint)) plmte$orig$endpoint <- o$endpoint + plmte$fp <- fp + + # Take the updated plmte, being careful not to overwrite anything and + # slot it back into its destination, returning the updated network. + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } + ) +} + + +#' Function to take results after running f_NMA_generateNetwork and then f_NMA_linkPHNMA +#' and then apply assumption-based HRs (e.g. same as another treatment/endpoint etc) +#' +#' WARNING: I expect the table in excel to be complete and to provide the HRs +#' required, rather than working them out based on the drop down selection +#' The function below simply adds the HR in question to the efficacy network +#' list. It won't work it out for you!! +#' +#' +#' NOTE: This function is ONLY for those rows that are NOT trial survival analysis, +#' FP NMA or PH NMA. Everything else is assumption based, and that is what +#' this function applies! +#' +#' @param network `p$releff$network` +#' @param phnma_table `p$releff$CODA$PH` +#' @param fpnma_table `p$releff$CODA$FP` +#' @param fpnma_destinations `p$releff$fp_dest` +#' @param excel_table `data.table(i$R_table_eff_data_settings)` +#' @param trial_flag `i$List_eff_datasources[1]` +#' @param fpnma_flag `i$List_eff_datasources[3]` +#' @param phnma_flag `i$List_eff_datasources[2]` +#' @param et_flag `i$List_eff_datasources[4]` +#' @param ahr_flag `i$List_eff_datasources[5]` +#' @param verbose `qc_mode` +#' +f_NMA_AddAssumptionsToNetwork <- + function(network, + phnma_table, + fpnma_table, + fpnma_destinations, + excel_table, + trial_flag = "Trial survival anlaysis", + fpnma_flag = "FP_NMA", + phnma_flag = "PH_NMA", + et_flag = "Assume equal to", + ahr_flag = "Apply HR to", + verbose = FALSE, + psa_flag = FALSE) { + + Reduce( + x = 1:nrow(excel_table), + init = network, + accumulate = FALSE, + f = function(prev, excel_tab_row) { + + dat <- as.list(excel_table[excel_tab_row,]) + + # add a message showing progress and what's happening: + if (all(verbose,dat$Include.in.this.analysis. == "Yes")) { + cat(paste0( + paste0( + "R_table_eff_data_settings row ",excel_tab_row," - dest: pop_", + dat$Population, + "$line_", + dat$Treatment.line, + "$mol_", + dat$Molecule, + "$trial_", + dat$Origin.trial, + "$endpoint_", + dat$End.point, + " | method: ", + dat$Effectiveness.data.source, + " | orig: pop_", + dat$Origin.population, + "$line_", + dat$Origin.line, + "$mol_", + dat$Origin.treatment, + "$trial_", + dat$Origin.trial, + "$endpoint_", + dat$Origin.endpoint, + "\n" + ) + )) + } + + # Cycle through the different possibilities, handling the case for different + # dropdown options one at a time. The cases are: + # + # - trial_flag: directly based on PLD extrapolation. sampling is done using parameters outside of this function. + # - phnma_flag: PHNMA. HRs are linked corresponding to the CODA sample, but overrides are to be enacted. CODA sampling is done externally + # - fpnma_flag: PHNMA. time-varying HRs are linked corresponding to the input data, but overrides are to be enacted. sampling done within. + # - Assume equal to: put in origin and set HR to 1, simple :). doesn't vary in PSA + # - apply HR to: put in origin and set HR to the "HR to apply" column in the table. also simple. for PSA use bounds externally, replacing the values in the table + + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$End.point) + ) + dN <- list( + pop = dat$Population, + line = dat$Treatment.line, + mol = dat$Molecule, + trial = dat$Origin.trial, + endpoint = dat$End.point + ) + # If the user in Excel has excluded by selecting no in the inclusion dropdown, + # exclude it! + if (dat$Include.in.this.analysis. == "No") { + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$include <- FALSE + if (all(verbose,dat$Include.in.this.analysis. == "No")) { + cat(paste0( + paste0( + "R_table_eff_data_settings row ",excel_tab_row," - dest: pop ", + dat$Population, + " line ", + dat$Treatment.line, + " mol ", + dat$Molecule, + " trial ", + dat$Origin.trial, + " endp ", + dat$End.point, + " | method: EXCLUDED FROM ANALYSIS BY EXCEL. INCLUDE IN THIS ANALYSIS SET TO NO FOR THIS PLMTE", + "\n" + ) + )) + } + return(prev) + } else { + plmte <- f_misc_get_plmte(prev,d) + plmte$include <- TRUE + } + + # Do a quick check on whether effectiveness data source has gone blank for + # some reason! + if (all( + is.null(plmte$orig$source), + !is.null(dat$Effectiveness.data.source) + )) { + plmte$orig$source <- dat$Effectiveness.data.source + } + + + if (plmte$orig$source == trial_flag) { + + # Simple. extrapolation based directly on PLD. The user in excel has indicated + # that this is one of the reference curves, overriding everything else. + # Origin should be the same as destination. + + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$End.point) + ) + dN <- list( + pop = dat$Population, + line = dat$Treatment.line, + mol = dat$Molecule, + trial = dat$Origin.trial, + endpoint = dat$End.point + ) + + # populate the origin + o <- list( + pop = paste0("pop_",dat$Origin.population), + line = paste0("line_",dat$Origin.line), + mol = paste0("mol_",dat$Origin.treatment), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$Origin.endpoint) + ) + o$dist <- dat$Curve.fit..for.survival.analysis. + o$source <- dat$Effectiveness.data.source + plmte$orig <- o + plmte$dest <- d + plmte$fp <- list() + + # slot the plmte back into the list and return the result: + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + + + } else if (plmte$orig$source %in% c(phnma_flag, fpnma_flag)) { + + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$End.point) + ) + dN <- list( + pop = dat$Population, + line = dat$Treatment.line, + mol = dat$Molecule, + trial = dat$Origin.trial, + endpoint = dat$End.point + ) + oN <- list( + pop = dat$Origin.population, + line = dat$Origin.line, + mol = dat$Origin.treatment, + trial = dat$Origin.trial, + endpoint = dat$Origin.endpoint + ) + + + + # Now, a check for an error in the excel file: + if (dN$mol == dat$Origin.treatment) { + warning(paste0( + "In row ", + excel_tab_row, + ", origin molecule (", + dN$mol, + ") is set to the SAME as destination molecule (", + dat$Origin.treatment, + ") in Excel. Please fix your human error. I cannot generate s(t)", + " for ", paste(d,collapse = "$"), "." + )) + return(prev) + } + + + if (dat$Effectiveness.data.source == phnma_flag) { + # Match on the phnma table as there's only 1 row per destination + tab_nma_match <- phnma_table[Population == dN$pop & + Line == dN$line & + Molecule == dN$mol & # destination molecule to populate + Reference.treatment== oN$mol & # origin molecule to apply HR to + Endpoint == dN$endpoint,] + } else { + # Match on the "destinations" for the FPNMA to populate across trials: + tab_nma_match <- fpnma_destinations[ Population == dN$pop & + Line == dN$line & + Molecule == dat$Molecule & + Reference.treatment == dat$Origin.treatment & + Reference.trial == oN$trial & + Endpoint == dN$endpoint, ] + } + + # If there is one hazard ratio, put it in: + if (nrow(tab_nma_match) == 1) { + # Take the ORIGIN from the excel table, superseding the linkage from + # the original PHNMA. + # + # EXPLANATION: If the NMA is linking A to B, but the user wants to + # apply the HR linking A to B to extrapolations derived + # from C, then the A vs B HR should be applied to dataset + # C. If it should be applied to reference curve B, then + # the data in the Excel table should have origin columns + # which match with the CODA sample for the NMA. If they + # are different, (e.g. apply HR from trials to RWE-based + # extrapolations per our base case!) then we should + # use the origin from the excel file and apply it to the + # HR that's given from the CODA sample!!! + + plmte$orig$pop <- paste0("pop_",dat$Origin.population) + plmte$orig$line <- paste0("line_",dat$Origin.line) + plmte$orig$mol <- paste0("mol_",dat$Origin.treatment) + plmte$orig$trial <- paste0("trial_",dat$Origin.trial) + plmte$orig$endpoint <- paste0("endpoint_",dat$Origin.endpoint) + plmte$orig$source <- dat$Effectiveness.data.source + + # Take the HR from the PH NMA, irrespective of what its reference trial + # is. Note that the distributional selection DOES NOT MATTER, as + # during propagation the original reference curve will propagate via + # those rows in excel that are set to "Trial survival analysis". See above + # + # e.g. prev$pop_1$line_1$mol_7$trial_0$endpoint_0 should have no HR but should + # have PLD-based extraps. + # + + if(dat$Effectiveness.data.source == phnma_flag) { + plmte$hr <- tab_nma_match$HR + } + + # Now that we've updated the origin for this plmte and have added the HR + # that we want to apply, we can slot the plmte back into prev, and return + # that as we're done with this row. + + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + + } else if (nrow(tab_nma_match) == 0) { + warning(paste0( + "Row: ", + excel_tab_row, + " Dest: ", + paste(unlist(d), collapse = "$"), + " is using ", + dat$Effectiveness.data.source, + ", but there is no NMA efficacy data which corresponds to it in the CODA sample!" + )) + return(prev) + } + + } else if (dat$Effectiveness.data.source %in% c(et_flag, ahr_flag)) { + + # Either we're applying a HR from the excel table, or we're assuming equal. + # Either way, we need the destination, the plmte and the origin: + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), # NOTE THE ASSUMPTION HERE ON trial==origin.trial + endpoint = paste0("endpoint_",dat$End.point) + ) + o <- list( + pop = paste0("pop_",dat$Origin.population), + line = paste0("line_",dat$Origin.line), + mol = paste0("mol_",dat$Origin.treatment), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$Origin.endpoint), + dist = NULL, + source = dat$Effectiveness.data.source + ) + + # override the info that's been put in there for origin and destination + # from the NMAs by the linkages being put in from the excel file. this + # then allows the user to apply relative efficacy estimated by the NMA + # to data not used in the NMA (e.g. apply NMA-based HRs to RWE rather than + # trial PLD) + plmte$orig <- o + plmte$dest <- d + + # If it's a hazard ratio and not assume the same, then put in the HR + if (dat$Effectiveness.data.source != et_flag) { + # Note for PSA we could use rlnorm or such here, including deriving meanlog and sdlog from bounds. + if (psa_flag) { + plmte$hr <- rnorm(1,dat$HR.to.apply,(dat$HR.95..CI..UCL.-dat$HR.to.apply)/1.96) + } else { + plmte$hr <- dat$HR.to.apply + } + } else { + plmte$hr <- 1 + } + + plmte$fp <- list() + + # either way slot in the origin information directly (we don't need anything more) + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + + return(prev) + } else { + stop(paste0( + "In the effectivness settings in excel, every row with 'Yes'", + " for inclusion MUST have an effectiveness data source that is ", + trial_flag, ", ",phnma_flag,", ",fpnma_flag,", ",et_flag,", or ", ahr_flag, + ". However, pop ", dat$Population.name, " line ", dat$Treatment.line, " molecule ", + dat$Molecule, " endpoint ", dat$End.point.name, " has a value of ",dat$Effectiveness.data.source, + " there! Please find and fix in the table" + )) + } + + } + ) +} + + + +#' Function to implement a fractional polynomial network meta analytic result (time-varying +#' hazard ratio) by multiplying the baseline hazard of the reference curve by the HR_t +#' and then recalculating the survival curve afterwards. +#' +#' @param ref_curve the reference curve to apply the time-varying HRs to +#' @param HR the time-varying hazard ratios to apply +#' +f_FPNMA_implement <- function(ref_curve, HR) { + + # Making sure the two vectors have the same length: + stopifnot(length(ref_curve) == length(HR)) + + # Calculate cumulative h(t), reference baseline h(t), apply HR, cumsum, calculate s(t) + ref_cum_haz <- -log(ref_curve) + ref_baseline_haz <- c(0,diff(ref_cum_haz)) + int_baseline_haz <- ref_baseline_haz * HR + int_cum_haz <- cumsum(int_baseline_haz) + return(exp(-int_cum_haz)) +} + + +#' Function to cycle through all possible pop line mol trial endpoint getting the +#' selected distribution for data-based, using that to apply hazard ratios to populate +#' "destination" entries +#' +#' As the comparative efficacy network provided and the TSD14 survival analysis all +#' follow identical structures (nesting of population line molecule trial endpoint) +#' and naming within those list structures, we can leverage this to essentially +#' replicate network but with an entry (wherever possible) for survival at time t +#' or "st" for short. +#' +#' This uses the "dist" entry in "origin" and "destination" entries to populate +#' those extrapolations that are not informed by either the NMA or assumption. +#' +#' The origin and destination lists for each pop line mol trial endpoint are then used +#' to figure out the distributional selection for "origin" extrapolations (i.e. where +#' the data is coming from). The function then takes the correct +#' distribution from the correct place (the "origin" location in extraps) and +#' applies the correct hazard ratio to it (the HR within the "destination" location) +#' to get the resulting extrapolated survival, to then go into the destination +#' under entry st. +#' +#' In other words, this function gets the right information from the right place +#' and uses it to generate all possible extrapolations given the data that is +#' provided. +#' +#' This function DOES NOT check which treatment pathways can and cannot be +#' simulated given the evidence provided. This is left to another function and +#' this one simply proliferates what can be proliferated! +#' +#' +#' @param network list resulting from f_NMA_generateNetwork, f_NMA_linkPHNMA, and then f_NMA_AddAssumptionsToNetwork +#' @param extraps result of applying f_surv_getExtrapolations to the TSD14 output from f_surv_runAllTSD14 +#' @param dos degrees of separation between original data and extrapolation (e.g. PFS for A (data) to PFS for B (HR1) to PFS for C (HR2) requires 2) +#' @param excel_table named range `R_table_eff_data_settings` from excel, as a `data.table` object +#' @param verbose extra console output if true +#' @param dist_lookups lookup table for distributions +#' @param psa_lambda_flag flag for using lambda approximation to improve efficiency in the PSA. default FALSE +#' @param psa_iteration optional - if `psa_lambda_flag` is TRUE or PSA is true (to be built) it is required. +#' @param th `NULL` except for PSA with lambda, which needs TH to extrapolate the curve should be th not the number of cycles in the model (i.e. that minus one) +#' @param psa_params optional - if `psa_lambda_flag` is TRUE or PSA is true (to be built) it is required. +#' +#' +f_releff_PropNetwork <- + function(network, + extraps, + excel_table, + dos = 5, + verbose = FALSE, + dist_lookups, + psa_lambda_flag = FALSE, + psa_iteration = NULL, + psa_params = NULL, + th = NULL) { + + + dl <- data.table(dist_lookups) + excel_table$xl_rn <- 1:nrow(excel_table) + ref_curves <- excel_table[Include.in.this.analysis. == "Yes" & Effectiveness.data.source == "Trial survival analysis",] + included <- excel_table[Include.in.this.analysis. == "Yes",] + releff <- excel_table[Include.in.this.analysis. == "Yes" & Effectiveness.data.source != "Trial survival analysis",] + + # Add some row numbers for ease of cross referencing things + ref_curves$ref_rn <- 1:nrow(ref_curves) + included$inc_rn <- 1:nrow(included) + releff$rel_rn <- 1:nrow(releff) + + # The reason there are 2 nested reduce statements here is that we need to + # cycle through degrees of separation, for each degree of separation we then + # need to go down the list of those PLMTEs that are included (according to + # excel_table) testing whether or not something can be done at that dos, then + # doing it if possible, leaving it if there's nothing at the origin (i.e. 1 dos away) + + + + # Reduce 1: degree of separation - essentially just repeating the inner reduce + # dos times, but keeping a cumulative result + Reduce( + x = 1:dos, + init = network, + accumulate = FALSE, + f = function(prev_network, deg_of_sep) { + + # The first time, do + # prev_network <- network + # + # to feed in the original network and + # deg_of_sep <- 1 + # + # To do the inner loop, run the inside, then do + # + # prev_network <- prev_network_this + # deg_of_sep <- deg_of_sep + 1 + # + # then run again. + + # It hugely simplifies the degree of separation issue if all of the "starting + # points" are entered into the final network first. this allows relative + # efficacy to fan out from all INITIAL ORIGIN s(t) right from the start + # without having to wait. This hugely simplifies the process at higher + # dos, which is then simply is there a ref curve, if no do nothing, if so + # apply releff. + + if (deg_of_sep == 1) { + prev_network_this <- Reduce( + x = 1:nrow(ref_curves), + init = prev_network, + accumulate = FALSE, + f = function(prev, ref_curve_row) { + + dat <- ref_curves[ref_curve_row,] + + # get the destination so we know where to put the extrapolation + d <- list( + pop = paste0("pop_",dat$Population), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$End.point) + ) + o <- list( + pop = paste0("pop_",dat$Origin.population), + line = paste0("line_",dat$Origin.line), + mol = paste0("mol_",dat$Origin.treatment), + trial = paste0("trial_",dat$Origin.trial), + endpoint = paste0("endpoint_",dat$Origin.endpoint) + ) + + # pull out that plmte from prev + plmte <- f_misc_get_plmte(prev, d) + + # Make sure excel is determinining the origin, superseding + # if it came from the NMA + plmte$orig[c("pop","line","mol","trial","endpoint")] <- o + + if(verbose) { + cat( + paste0( + "ref curve #", + ref_curve_row, + ", dest: ", + paste(d, collapse = "$"), + " orig: ", + paste(o, collapse = "$"), + "\n" + ) + ) + } + + # pull out the distributional selection from it: + if (is.null(plmte$orig$dist)) plmte$orig$dist <- dat$Curve.fit..for.survival.analysis. + dst <- dl[Description == plmte$orig$dist,]$RCC_input_desc + + if (is.null(plmte$orig$source)) plmte$orig$source <- dat$Effectiveness.data.source + + # check extrapolations available + # If it's the PSA and we're using lambda approximation, then we + # have a THxnPSA matrix here (all exponential), + # otherwise we have a THx7 matrix by dist + if (psa_lambda_flag) { + # When it's the PSA and the lambda approximation method is being used + # then extraps is a set of lambdas, each a fixed rate for that PLMTE. + # Extrapolate that to get curves. + + # in first-line therapy, we preserve the distributional shape + if (d$line == "line_1") { + par <- f_misc_get_plmte(psa_params,d) + dr <- .subset2(par,"draws") + dis <- par$id$dist + if (dis == "exp") { + plmte$st <- f_extrapolate(0:th,dr[psa_iteration],dis) + } else { + plmte$st <- f_extrapolate(0:th,dr[psa_iteration,],dis) + } + } else { + # Otherwise we use the lambda we calculated earlier to remove the + # need for tunnel states for the PSA only. + plmte$st <- f_psa_exp(0:th,f_misc_get_plmte(extraps,o)[psa_iteration]) + } + plmte$populated <- TRUE + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } else if (all(class(f_misc_get_plmte(extraps,o)) %in% c("matrix", "array"))) { + plmte$st <- f_misc_get_plmte(extraps,o)[,dst] + plmte$populated <- TRUE + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } else { + warning(paste0("ref curve #",ref_curve_row, ", destination: ", paste(d,collapse="$"), " is set to from trial analysis but there's no extrapolations at that location")) + return(prev) + } + } + ) + return(prev_network_this) + } else { + # We've already inserted all the reference curves, so we can fan out + # from each of them one dos at a time. + + # Reduce 2: inclusion table - cycling through all the PLMTEs which are + # included with a "Yes" in the column in excel efficacy settings + # sheet. Each one gets tested for being populated already, then + # method, then the origin is extracted. if no data in the origin + # nothing can be done. if data in the origin then apply the + # correct method. + + # To run line by line: + # + # prev_network <- prev_network_this + # deg_of_sep <- deg_of_sep + 1 + # prev <- prev_network + # + # set the releff_row to be the row in releff you want to apply + # + # Whilst inside, to look up a row (row numbers were added at the beginning): + # + # releff[Population == 0 & Treatment.line == 4 & Molecule == 5 & End.point == 2, ] + # + # Just change the numbers to fit. put that number in releff_row to go + # through that line of the table + # + # releff_row <- releff[Population == 0 & Treatment.line == 2 & Molecule == 0 & End.point == 2, ]$rel_rn + # + + prev_network_this <- Reduce( + # x = 1:63, + x = 1:nrow(releff), + init = prev_network, + accumulate = FALSE, + f = function(prev, releff_row) { + dat <- releff[releff_row, ] + + # Get the destination from the table. Note the assumption on trial + d <- list( + pop = paste0("pop_" , dat$Population), + line = paste0("line_" , dat$Treatment.line), + mol = paste0("mol_" , dat$Molecule), + trial = paste0("trial_" , dat$Origin.trial), + endpoint = paste0("endpoint_", dat$End.point) + ) + + # pull out the PLMTE + plmte <- f_misc_get_plmte(prev, d) + + # Check if done already, if so return prev to avoid repetition + if (all("populated" %in% names(plmte), plmte$populated == TRUE)) return(prev) + + # get information on the origin from plmte, then pull out that data + o <- plmte$orig + orig_data <- f_misc_get_plmte(prev, o) + + # Error check - if the plmte and origin plmte are identical then + # something is wrong (shouldn't ever be the case for non trial data analysis): + if (identical(orig_data,plmte)) { + if (plmte$orig$source == "Assume equal to") { + stop(paste0( + "Circular reference at: ", + paste(unlist(d), collapse = "$"), + " (entry ", dat$xl_rn, ". See Column BQ in Excel effectiveness settings page)", + ". Method is 'Assume equal to' and origin=destination." + )) + } + } + + + # Check if the origin is empty for this destination. shouldn't happen + # much but worth a failsafe: + if (any( + is.null(o$pop), + is.null(o$line), + is.null(o$mol), + is.null(o$trial), + is.null(o$endpoint), + is.null(o$source) + )) { + warning(paste0( + "Entry at ", + paste(d$pop, d$line, d$mol, d$trial, d$endpoint, sep = "$"), + " has missing origin data. This should've been entered via f_NMA_AddAssumptionsToNetwork", + " earlier. Attempting to recover..." + ),immediate. = TRUE) + + # Attempt to recover by getting the settings from the excel table + # by matching up the entries + dN <- list( + pop = dat$Population, + line = dat$Treatment.line, + mol = dat$Molecule, + trial = dat$Origin.trial, + endpoint = dat$End.point + ) + o <- as.list(included[Population == dat$Population & + Treatment.line == dat$Treatment.line & + Molecule == dat$Molecule & + Origin.trial == dat$Origin.trial & + End.point == dat$End.point + , list(Origin.population,Origin.line,Origin.treatment,Origin.trial, Origin.endpoint,Effectiveness.data.source)]) + names(o) <- c("pop","line","mol","trial","endpoint","source") + o$pop <- paste0("pop_",o$pop) + o$line <- paste0("line_",o$line) + o$mol <- paste0("mol_",o$mol) + o$trial <- paste0("trial_",o$trial) + o$endpoint <- paste0("endpoint_",o$endpoint) + + + # slot them into plmte + plmte$orig$pop <- o$pop + plmte$orig$line <- o$line + plmte$orig$mol <- o$mol + plmte$orig$trial <- o$trial + plmte$orig$endpoint <- o$endpoint + plmte$orig$source <- o$source + + # update prev to keep a permanent record of what we've done, + # making sure to only replace those items which exist + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$pop <- o$pop + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$line <- o$line + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$mol <- o$mol + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$trial <- o$trial + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$endpoint <- o$endpoint + + # get information on the origin from plmte, then pull out that data + o <- plmte$orig + orig_data <- f_misc_get_plmte(prev, o) + } + + # if that data has s(t) in it, we can do something, otherwise we + # can't. + + if (!"populated" %in% names(orig_data)) { + # We can't do anything here yet. If orig is busted we already tried + # to fix it above. + return(prev) + } else { + # there is data in the orig_data to apply relative efficacy to. + # we can apply it + + # limit data to the model time horizon + + orig_data$st <- orig_data$st[1:(p$basic$th+1)] + plmte$fp$HR <- plmte$fp$HR[1:(p$basic$th+1)] + + if (verbose) { + cat( + paste0( + "dos ", + deg_of_sep, + " | releff row: ", + releff_row, + " | excel row: ", + dat$xl_rn, + " | D: ", + paste(d$pop, d$line, d$mol, d$trial, d$endpoint, sep = "$"), + "\t | O: ", + paste(o$pop, o$line, o$mol, o$trial, o$endpoint, sep = "$"), + ": Method = ", + o$source, + "\n" + ) + ) + } + method <- o$source + + # note that if source was missing, we already caught that above + # and tried to fix it. if it's still blank it's blank in excel. + if (is.null(method)) { + warning(paste0( + "dos ", + deg_of_sep, + " | row: ", + releff_row, + " | Destination: ", + paste(d$pop, d$line, d$mol, d$trial, d$endpoint, sep = "$"), + " Origin: ", + paste(o$pop, o$line, o$mol, o$trial, o$endpoint, sep = "$"), + ": o has no method in it, trying to recover..." + ),immediate. = TRUE) + + o$source <- dat$Effectiveness.data.source + method <- dat$Effectiveness.data.source + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]]$orig$source <- dat$Effectiveness.data.source + + if (is.null(o$source)) stop("Failed...row in excel has no method, network has no method. shouldn't happen!") + + return(prev) + } + + # Now we can get on with applying relative efficacy + if (method == "Assume equal to") { + # if assuming equal to, then set equal to, update populated, slot in, done. + plmte$st <- orig_data$st + if (length(plmte$st) > 0) { + plmte$populated <- TRUE + } else { + plmte$populated <- NULL + } + } else if (method %in% c("Apply HR to", "PH_NMA")) { + # simply apply hazard ratio, update, slot in, return. + plmte$st <- orig_data$st ^ plmte$hr + if (length(plmte$st) > 0) { + plmte$populated <- TRUE + } else { + plmte$populated <- NULL + } + } else if (method == "FP_NMA") { + # Apply FP NMA using the function, update, slot in, return. + # To plot: + # plot(orig_data$st,type="l") + # lines(f_FPNMA_implement(orig_data$st, plmte$fp$HR),col="red") + plmte$st <- f_FPNMA_implement(orig_data$st, plmte$fp$HR) + if (length(plmte$st) > 0) { + plmte$populated <- TRUE + } else { + plmte$populated <- NULL + } + } else{ + stop( + paste0( + "Destination ", + paste(unlist(d), collapse = "$"), + " origin ", + paste(unlist(o), collapse = "$"), + " has an invalid dropdown menu selection in the 'Effectiveness data source' column in Excel 'Effectiveness settings' sheet" + ) + ) + } + # slot in the updated PLMTE and return the result. + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } + + } + ) + return(prev_network_this) + } + }) +} + + + + + +# Misc functions ---------------------------------------------------------- + +#' Function to add the estimated hazard of two curves. Useful for getting +#' TTD not censoring for death for the partitioned survival model. +f_surv_hazardadd <- function(s_1t, s_2t) { + s_1t * s_2t +} + +#' Calculate probability of dying in each cycle from the survival curve +f_surv_get_q_t <- function(s_t) { + data.table::nafill(1 - (data.table::shift(s_t, type = "lead") / s_t), "locf") +} + +#' Estimate the hazard function from the survival curve +f_surv_get_h_t <- function(s_t, cl_yr = 1/52) { + data.table::nafill((log(s_t) - log(data.table::shift(s_t, type = "lead"))), "locf") / cl_yr +} + + +# Computing extrapolations in CE model ---------------------------------------------------------- + + +#' Function to go into the regression structure generated by f_surv_runAllTSD14 and +#' pull out just the extrapolations (st for survival at time t) for all PLMTEs +#' +#' @param regs regression list resulting from f_surv_runAllTSD14 +#' +f_surv_getExtrapolations <- function(regs) { + lapply(regs, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(tr) { + lapply(tr, function(plmte) { + plmte$st + }) + }) + }) + }) + }) +} + +#' Function which checks whether a treatment sequence can be estimated given the +#' extrapolations which are currently available +#' +#' @param treatment_sequence a string vector containing the molecule identifiers (e.g. mol_1 mol_4 mol_7 mol_999) +#' @param st calcualted extrapolations with comparative efficacy applied and before transformation into TPs +#' @param lookups vlookup table list from excel (i.e. `i$lookup`) +#' @param pop_n population as a number (0, 1, 2 or 3) +#' +f_seq_extrapCollector <- function(treatment_sequence, st, lookups, pop_n = 0, pop_0_2Lp = TRUE, required_endpoints = paste0("endpoint_",0:4)) { + + len_ts <- length(treatment_sequence) + + # Make the name for the population + risk_pop <- paste0("pop_",pop_n) + + # labels to cycle through for lapply operations for plmte stuff + line_labs <- structure(paste0("line_",1:len_ts),names=paste0("line_",1:len_ts)) + endp_labs <- structure(paste0("endpoint_",0:4),names=paste0("endpoint_",0:4)) + + # bodge error for now, genericisation must allow subgroups at later lines. + if (pop_0_2Lp == FALSE) stop("pop_0_2Lp must be TRUE. Function forces assumption of 2L+ risk population 0. contact developers if you want something else!") + + # Derive which are populated and which aren't, simplifying into tables: + + which_populated <- lapply(line_labs, function(li) { + + if (li == "line_1") { + i_st <- st[[risk_pop]][[li]] + } else { + i_st <- st[["pop_0"]][[li]] + } + + lapply(i_st, function(mol) { + do.call( + rbind, + lapply(mol, function(tr) { + tst <- unlist(lapply(tr, function(plmte) { + "populated" %in% names(plmte) + })) + # Now, every so often not all endpoints make it this far + # so those that aren't there need to get added back in with a FALSE + if (all(required_endpoints %in% names(tr))) { + return(tst) + } else { + # put a FALSE in the place that was missing and then ensure it's + # in the right order + which_names_missing <- required_endpoints[which(!required_endpoints %in% names(tr))] + tst[which_names_missing] <- FALSE + return(tst[required_endpoints]) + } + }) + ) + }) + }) + + # Narrow it down to only those relevant to this pathway: + # shorten the treatment sequence name and derive a numerical version, then trt names! + ts <- treatment_sequence + ts_n <- as.integer(gsub(pattern = "mol_",replacement = "",ts)) + ts_lab <- names(lookups$trt[which(lookups$trt %in% ts_n)]) + + availability_tables <- lapply(1:len_ts, function(trt_line) { + which_populated[[attr(treatment_sequence[trt_line], "names")]][[treatment_sequence[trt_line]]] + }) + names(availability_tables) <- ts_lab + + # Now identify the trial that's recorded for each PLMTE for this TS. + which_trials <- lapply(availability_tables, function(trt_line) { + rn <- rownames(trt_line) + + if (length(rn) == 1) { + temp_vec <- ifelse(trt_line,as.numeric(gsub("trial_","",rn))+1,NA)[rn,] + temp_vec[!is.na(temp_vec)] + } else { + temp_tab <- do.call( + rbind, + lapply(1:nrow(trt_line), function(the_row) { + ifelse(trt_line[the_row,],as.numeric(gsub("trial_","",rownames(trt_line)[the_row]))+1,NA) + }) + ) + unlist(lapply(1:ncol(trt_line), function(the_column) { + temp_tab[which(!is.na(temp_tab[,the_column])),the_column] + })) + } + }) + + # error check - there should only ever be one trial basis for PLMTE. if more than + # one then we need a load of inputs adding to excel and a much more complicated function. + for (tr in 1:len_ts) { + if (any(lapply(which_trials[[tr]],length) > 1)) { + stop( + paste0( + "In treatment sequence ", + paste(treatment_sequence, collapse = " "), + ", Multiple trials available to inform ", + tr, + "L extrapolations! these are ", + which_trials[[tr]], + "...FUNCTION AND EXCEL NEED TO BE UPDATED TO COPE WITH THIS!!!" + ) + ) + } + } + + # pull out the s(t) for each plmte which is to be used for this treatment + # sequence: + + st_ts <- lapply(1:len_ts, function(trt_line) { + pop_txt <- ifelse(trt_line == 1, risk_pop, "pop_0") # later lines assumed risk pop 0 (all patients) + line_txt <- paste0("line_",trt_line) + tr_ids <- names(which_trials[[trt_line]]) + mol_txt <- treatment_sequence[line_txt] + + st_line <- lapply(1:length(tr_ids), function(endp_n) { + tr_txt <- paste0("trial_",which_trials[[trt_line]][endp_n]-1) + endp_txt <- paste0("endpoint_",endp_n-1) + st[[ifelse(trt_line == 1, risk_pop, "pop_0")]][[line_txt]][[mol_txt]][[tr_txt]][[endp_txt]] + }) + names(st_line) <- lookups$ipd$endpoint[match(1:length(tr_ids)-1,Number),]$Description + + return(st_line) + + }) + names(st_ts) <- paste0("line_",1:len_ts) + + # So now we've collected all of the relevant extrapolations for this TS. + # The next step is to figure out which method/assumptions can/should be applied to + # derive transition probabilities between all of the model health states + # for this treatment pathway. However, this will be handled in another function + # as this one is already long. + + return(list( + availability = availability_tables, + trials = which_trials, + st = st_ts + )) +} + + +#' Function to maximize estimated hazards between two curves. +#' +#' takes s_t and the reference s_t to maximize hazards against. +#' +#' As the function with the greatest mortality probability will also have the +#' greatest hazard, uses mortality probabilities as they should be quicker to +#' calculate +f_surv_hazardmax <- function(s_t, reference_s_t) { + th_c <- length(s_t) + h_t_fixed <- pmax(f_surv_get_q_t(s_t)[1:th_c],f_surv_get_q_t(reference_s_t)[1:th_c]) + return(cumprod(1-h_t_fixed)) +} + +#' Hazard minimizer instead of maximizer +f_surv_hazardmin <- function(s_t, reference_s_t) { + th_c <- length(s_t) + h_t_fixed <- pmin(f_surv_get_q_t(s_t)[1:th_c],f_surv_get_q_t(reference_s_t)[1:th_c]) + return(cumprod(1-h_t_fixed)) +} + + + +#' Function to apply general population mortality adjustment to all OS lines +f_surv_gpopadjust <- function(st,gpop, method = "hazardmax", verbose=FALSE) { + + # Cycle through all PLMT's only for endpoint_0, applying hazard max with + # the corresponding gpop OS line + npopu <- names(st) + names(npopu) <- npopu + lapply(npopu, function(this_npopu) { + + nline <- names(st[[this_npopu]]) + names(nline) <- nline + + lapply(nline, function(this_nline) { + + nmol <- names(st[[this_npopu]][[this_nline]]) + names(nmol) <- nmol + + lapply(nmol, function(this_nmol) { + + ntrial <- names(st[[this_npopu]][[this_nline]][[this_nmol]]) + names(ntrial) <- ntrial + + lapply(ntrial, function(this_ntrial) { + + nendp <- names(st[[this_npopu]][[this_nline]][[this_nmol]][[this_ntrial]]) + names(nendp) <- nendp + + lapply(nendp, function(this_nendp) { + + plmte <- st[[this_npopu]][[this_nline]][[this_nmol]][[this_ntrial]][[this_nendp]] + + # If there's no survival extrapolation here, then there's nothing to do + if (!"st" %in% names(plmte)) { + return(plmte) + } else { + # Basically, if it's empty already, then there's nothing to do! + if (is.null(plmte$dest)) { + if (verbose) cat(paste0( + "This PLMTE has no destination, doing nothing. (plmte = ", + paste(c(this_npopu,this_nline,this_nmol,this_ntrial,this_nendp), collapse = "$"), + ")\n" + )) + return(plmte) + } + + d <- plmte$dest + if (verbose) cat(paste0("Gpop adj: ",paste(unlist(d),collapse = " | "),"\n")) + gp <- gpop[[d$pop]][[d$line]]$os + if (method == "hazardmax") { + plmte$st <- f_surv_hazardmax(plmte$st, gp) + } else { + plmte$st <- pmin(plmte$st, gp) + } + + if (length(plmte$st) == 1) { + plmte$st <- NULL + return(plmte) + } else { + plmte$gpop_adjusted <- TRUE + plmte$gpop_method <- method + return(plmte) + } + } + }) + }) + }) + }) + }) +} + +#' hazard max/abs min for PFS vs OS to prevent curve crossing +f_surv_PFSxOS <- function(st,method = "hazardmax") { + + # Cycle through all PLMT's only for endpoint_0, applying hazard max with + # the corresponding gpop OS line + + Reduce( + x = 1, + init = st, + accumulate = FALSE, + f = function(prev, dos) { + + lapply(st, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(plmt) { + + OS <- plmt$endpoint_0 + PFS <- plmt$endpoint_1 + + # If there's no extrapolations to adjust then don't. + if (!"st" %in% names(OS)) { + return(plmt) + } + + if (method == "hazardmax") { + PFS$st <- f_surv_hazardmax(PFS$st, OS$st) + } else { + PFS$st <- pmin(PFS$st, OS$st) + } + PFS$curve_cross <- TRUE + PFS$curve_cross_method <- method + plmt$endpoint_1 <- PFS + + # Now that we updated the OS for this PLMT, we can just return it + return(plmt) + }) + }) + }) + }) + } + ) +} + + +#' hazard min/abs max for TTD vs PFS to prevent curve crossing. TTP should be +#' the same as or above PFS at all times, since it censors for death and PFS +#' includes death as an event. It should be impossible for TTP to be below PFS +#' +#' Also note that as TTP (and TTD) ignore death events, they can be above OS +#' +f_surv_PFSxTTP <- function(st, method = "hazardmax") { + # Cycle through all PLMT's only for endpoint_0, applying hazard max with + # the corresponding gpop OS line + + lapply(st, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(plmt) { + PFS <- .subset2(plmt, "endpoint_1") + TTP <- .subset2(plmt, "endpoint_3") + if (!"st" %in% names(PFS)) { + return(plmt) + } + if (method == "hazardmax") { + TTP$st <- f_surv_hazardmin(PFS$st, TTP$st) + } else if (method == "abs") { + TTP$st <- pmax(PFS$st, TTP$st) + } else { + stop("the argument 'method' should be 'hazards' or 'abs'") + } + PFS$curve_cross <- TRUE + PFS$curve_cross_method <- method + plmt$endpoint_3 <- TTP + + # Now that we updated the OS for this PLMT, we can just return it + return(plmt) + }) + }) + }) + }) +} + +#' Although time to discontinuation can have lower hazard than PFS, it can't +#' have lower hazard than OS, since you can't be dead but on treatment. +f_surv_TTDxOS <- function(st, method = "hazardmax") { + + # Cycle through all PLMT's only for endpoint_0, applying hazard max with + # the corresponding gpop OS line + + return(Reduce( + x = 1, + init = st, + accumulate = FALSE, + f = function(prev, dos) { + + lapply(st, function(popu) { + lapply(popu, function(li) { + lapply(li, function(mol) { + lapply(mol, function(plmt) { + + OS <- .subset2(plmt,"endpoint_0") + TTD <- .subset2(plmt,"endpoint_2") + + if (!"st" %in% names(OS)) { + return(plmt) + } + + if (method == "hazardmax") { + TTD$st <- f_surv_hazardmax(TTD$st,OS$st) + } else if (method == "abs") { + TTD$st <- pmin(TTD$st,OS$st) + } else { + stop("the argument 'method' should be 'hazards' or 'abs'") + } + TTD$curve_cross <- TRUE + TTD$curve_cross_method <- method + plmt$endpoint_2 <- TTD + + # Now that we updated the OS for this PLMT, we can just return it + return(plmt) + }) + }) + }) + }) + } + )) +} + +f_surv_adjuvant_HR <- function(st,adjuvant_impact,demo_table, lookup, verbose = FALSE) { + + # Cycle through all PLMT's only for endpoint_0, applying hazard ratio associated with + # adjuvant treatment + + adjuvant_impact <- as.data.table(adjuvant_impact) + + # Make a multi-id, which can id entries inside of st in two different ways: + # + # - by number for risk population (0, 1, 2) + # - by "label" for risk population (pop_0, pop_1, pop_2) + # + # The reason for doing this is that the table adjuvant_impact has the numbers + # but the output list needs to have the pop_0 type labelling to match the rest + # of the PLMTE relational database type system we've set up. + # + pop_names <- names(st) + pop_id <- as.numeric(gsub("pop_","",names(st))) + names(pop_id) <- names(st) + + + # Cycle through pop_id, using the numbers or the labels to pull the right + # set of data: + + lapply(pop_id, function(popu) { + + # simplify things by pulling out the label, position and number as separate + # things to keep everything simple in the code that follows. + p_pos <- which(pop_id %in% popu) + p_lab <- names(pop_id)[p_pos] + p_num <- as.numeric(popu) + + # Use the lookup table to also pull up the name of the risk population + # as used within Excel: + p_nam <- lookup$ipd$pop[Number == p_num,]$Description + + # now, do the same thing by treatment line, we only need numbers and + # labels this time though: + line_names <- names(st[[p_lab]]) + line_id <- as.numeric(gsub("line_", "", line_names)) + names(line_id) <- line_names + + # Cycle through the line ids. We still need numbers to filter the demo table and + # ids to name the list: + lapply(line_id, function(li) { + + # Number for this line. we need the label for getting the names in the next + # level: + l_num <- as.numeric(li) + l_pos <- which(line_id %in% l_num) + l_lab <- names(line_id)[l_pos] + + # Note that the demo table in excel uses p_nam and NOT the number! + prop_adj <- demo_table[Treatment.line == l_num & Population == p_nam]$Prior.IO...in.12.months.Mean + + mol_names <- names(st[[p_lab]][[l_lab]]) + mol_id <- as.numeric(gsub("mol_", "", mol_names)) + names(mol_id) <- mol_names + + lapply(mol_id, function(mol) { + + # now number position label and name for molecule: + m_num <- as.numeric(gsub("mol_", "", mol)) + m_pos <- which(mol_id %in% m_num) + m_lab <- names(mol_id)[m_pos] + + # The table in excel named range R_table_prior_IO_impact_eff uses + # the treatment name rather than molecule number, so we need to lookup + # the name + m_nam <- lookup$ipd$mol[Number == m_num,]$RCC_input_desc + + t_labs <- names(st[[p_lab]][[l_lab]][[m_lab]]) + t_nums <- as.numeric(gsub("trial_", "", t_labs)) + + # Make the cycling index for trials. the function structure + # lets you make the values and the names in one go. + trial_id <- structure(t_nums,.Names=t_labs) + + lapply(trial_id, function(tr) { + + # Make an id set for the trial being considered: + t_num <- as.numeric(gsub("trial_", "", tr)) + t_pos <- which(trial_id %in% t_num) + t_lab <- names(trial_id)[t_pos] + + # calculate the impact of prior adjuvant therapy (HR assumed 1 for patients with no prior adjuvant) + impact_adj <- prop_adj * adjuvant_impact[Treatments == m_nam,]$Prior.adj.impact_HR_mean + (1 - prop_adj) + + # The impact is applied like a hazard ratio to the element "st" inside + # of each PLMTE. Therefore, we need to cycle through each endpoint + # one at a time, checking whether element st even exists, then applying + # the hazard ratio to it if it does. + + # cycle through this plmt, one endpoint at a time, applying HR if + # it's not 1 and s(t) exists and is finite. + plmt <- lapply(st[[p_lab]][[l_lab]][[m_lab]][[t_lab]], function(plmte) { + + if ("st" %in% names(plmte)) { + if (all(is.finite(plmte$st), is.finite(impact_adj))) { #, impact_adj != 1 + + if (verbose) { + f_misc_colcat( + paste0( + "Applying adjuvant HRs | ", + paste(p_lab,l_lab,m_lab,t_lab), + " | HR=", + impact_adj + ) + ) + } + + # If the entry st (survival at time t) exists in this PLMTE and + # it is finite (i.e. not NA or something) then apply the HR to it + # otherwise don't. Obviously if the HR is 1 then computation is + # unecessary. + plmte$st <- plmte$st ^ impact_adj + + # Now that we're done editing this PLMTE, we return the updated + # version: + return(plmte) + } + } else { + # there's nothing to do because there's no entry in this PLMTE + # for st (survival at time t), so there's nothing to apply a HR to + return(plmte) + } + }) + # Now that we've edited this plmt, we can return the updated result + return(plmt) + }) + }) + }) + }) + +} + + +# Stopping rules ---------------------------------------------------------- + +#' Function to apply stopping rules by directly modifying TTD to make it 0 at +#' the start of the given cycle (e.g. if 104 weeks given, it should be 0 from the +#' 105th week onwards, i.e. patients get treated in week 103 starting from 0, but +#' then not from 104 onwards) +#' +f_surv_apply_stopping_rules <- function(st, tot_table, lookups) { + + # filter down the table to just those with a max TxDur in cycles that have Yes for inclusion: + txDur <- tot_table[Treatment.given.for.fixed.duration...Yes.No. == "Yes" & If.yes..number.of.cycles != 0 ,] + + # Cycle down the table using the Reduce method, only changing TTD in corresponding + # DESTINATION PLMTs. We don't have TOT lines, so TTD is the only thing we can + # manipulate here. However, it may be that we have to create TOT for the PS model + # applying these. + + Reduce( + x = 1:nrow(txDur), + init = st, + accumulate = FALSE, + f = function(prev, txDur_row) { + + dat <- as.list(txDur[txDur_row,]) + + # Round the max treatment duration up to the next cycle. In reality these + # should all be integer values for. The +1 is for cycle 0 as it will stop + # treatment at the start of the cycle given. + max_txDur <- ceiling(dat$If.yes..number.of.cycles) + 1 + + # the destination is by population line and molecule only across all trials + # and endpoints. Therefore, PLM rather than PLMTE + d <- list( + pop = paste0("pop_",lookups$ipd$pop[Description == dat$Population,"Number"]), + line = paste0("line_",dat$Treatment.line), + mol = paste0("mol_",dat$Molecule) + ) + + # Cycle through all relevant trials for the plm, applying the stopping rule to the + # TTD line in each by setting all TTD values at or beyond the stopping rule + # time to 0 + + plm_updated <- lapply(prev[[d$pop]][[d$line]][[d$mol]], function(plmt) { + + # For this plmt, check if TTD is populated or not. if not, then just return it + # unchanged, if so, apply the stopping rule and return the result. + + if ("populated" %in% names(plmt$endpoint_2)) { + + # Apply the stopping rule at the nth cycle (taking 0 into account) + plmt$endpoint_2$st[max_txDur:length(plmt$endpoint_2$st)] <- 0 + plmt$endpoint_2$stopping_rule <- TRUE + plmt$endpoint_2$stopping_rule_cl <- max_txDur + + return(plmt) + } else { + return(plmt) + } + }) + + # Now simply inject the updated plm into prev, and return the updated list, + # allowing us to then apply the next row of the Excel table in the next + # go round. + prev[[d$pop]][[d$line]][[d$mol]] <- plm_updated + return(prev) + } + ) + +} + + + +# QC functions ------------------------------------------------------------ + +# plots to look at abs survival and estimated hazard + +f_qc_surv_ExtrapPlot <- function(st,popu,li,mo,tr,t_yr,th) { + + endpoints <- structure( + .Data = c("endpoint_0", "endpoint_1", "endpoint_2", "endpoint_3"), + .Names = c("OS", "PFS", "TTD", "TTP") + ) + + p_dat <- rbindlist(lapply(1:length(endpoints), function(endp) { + + if (length(st[[popu]][[li]][[mo]][[tr]][[endpoints[endp]]]$st[1]) > 0) { + data.table(t_yr = t_yr, + s_t = st[[popu]][[li]][[mo]][[tr]][[endpoints[endp]]]$st, + endp = names(endpoints[endp])) + } + })) + + if(is.null(p_dat)) { + return(NULL) + } else { + ggplot(p_dat, aes(x = t_yr, y = s_t, colour = endp)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom", legend.title=element_blank()) + + labs(title = NULL, x = "Time (years)", y = "% Survival") + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + + scale_y_continuous(labels = scales::percent) + } +} + + + +f_qc_surv_gethead <- function(x, p,l,m,t,len = 10) { + do.call(cbind,lapply(x[[paste0("pop_",p)]][[paste0("line_",l)]][[paste0("mol_",m)]][[paste0("trial_",t)]], function(endp) { + head(endp$st,len) + }) + ) +} + +f_qc_lookupplm <- function(tab, p,l,m) { + tab[Population == p & Treatment.line == l & Molecule == m, ] +} +f_qc_lookupplm_e <- function(tab, p,l,m,e) { + tab[Population == p & Treatment.line == l & Molecule == m & End.point == e, ] +} + + +f_qc_surv_EstHazPlot <- function(st,gpop,popu,li,mo,tr,t_yr,th) { + endpoints <- structure( + .Data = c("endpoint_0", "endpoint_1", "endpoint_2", "endpoint_3"), + .Names = c("OS", "PFS", "TTD", "TTP") + ) + + p_dat <- do.call(rbind,lapply(1:length(endpoints), function(endp) { + if (!is.na(st[[popu]][[li]][[mo]][[tr]][[endpoints[endp]]]$st[1])) { + data.table(t_yr = t_yr, + est_h_t = f_surv_get_h_t(st[[popu]][[li]][[mo]][[tr]][[endpoints[endp]]]$st, t_yr[2]), + endp = names(endpoints[endp])) + } + })) + + p_dat <- rbind(p_dat,data.table(t_yr = t_yr,est_h_t = f_surv_get_h_t(gpop[[popu]][[li]]$os, t_yr[2]),endp="gpop")) + if(nrow(p_dat) == length(t_yr)) { + return(NULL) + } else { + ggplot(p_dat, aes(x = t_yr, y = est_h_t, colour = endp)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom", legend.title=element_blank()) + + labs(title = NULL, x = "Time (years)", y = "Transition probability") + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + } +} + + + diff --git a/3_Functions/survival/other_cause_mortality.R b/3_Functions/survival/other_cause_mortality.R new file mode 100644 index 0000000..4b02d5a --- /dev/null +++ b/3_Functions/survival/other_cause_mortality.R @@ -0,0 +1,567 @@ +#' Functions to adjust for general population mortality +#' + +#' Convenience function to pick the right table out of what was loaded from +#' Excel and rename the columns +get_lifetables <- function(i) { + stopifnot("R_table_mort_lifeTable" %in% names(i)) + + res <- i$R_table_mort_lifeTable + + names(res) <- c("x", "q_male", "q_female") + + res <- rbind(res, data.frame(x = 101, q_male = 1, q_female = 1)) + + res +} + +#' Fast function to get gpop OS line, optionally returns full matrix for QC +#' or just vector of age and sex adjusted OS line for gpop mortality. +f_surv_getgpopOSFast <- function(bl_mal, bl_age, t_yr, lt, full_output = FALSE) { + + # The cycle length is always the 2nd element of t_yr, as its always 1 time step :) + cl_yr <- t_yr[2] + + # Make vector of age + age_t <- pmin(floor(bl_age + t_yr),100) + + # Get vector of qx given age for m and f + lt_aligned <- lt[match(age_t,age..x.),] + + # Calculate hazard rates (time units are still years) + lt_aligned[, `:=`(h_ma = -log(1-qx..males.), h_fe = -log(1-qx..females.))] + + # Calculate per-cycle mortality probabilities + lt_aligned[, `:=`(qt_ma = 1 - (1-qx..males.)^cl_yr, qt_fe = 1 - (1-qx..females.)^cl_yr)] + + # Calculate overall survival + lt_aligned[, `:=`( + os_ma = cumprod(exp(-lag(h_ma, 1, 0)*cl_yr)), + os_fe = cumprod(exp(-lag(h_fe, 1, 0)*cl_yr)) + )] + + # Compute age and sex adjusted OS line + lt_aligned[, s_t := bl_mal * os_ma + (1 - bl_mal) * os_fe] + + # calculate balance of male to female for weighting + lt_aligned[, w_ma := nafill((os_ma * bl_mal) / s_t, "locf")] + + # Calculate weighted hazard + lt_aligned[, h_w := (h_ma * w_ma) + (h_fe * (1-w_ma))] + + + # Return matrix + + if (full_output) { + return( + as.matrix( + lt_aligned[, .(age=age..x.,qx_m_orig=qx..males.,qx_f_orig=qx..females.,qx_m_target=qt_ma,qx_f_target=qt_fe,os_ma=os_ma,os_fe=os_fe,w_ma=w_ma,h_w=h_w,gpop_os=s_t)] + ) + ) + } else { + return(lt_aligned$s_t) + } + +} + +# Testing for it: +# f_surv_getgpopOSFast( +# bl_mal = 0.5, +# bl_age = 50, +# t_yr = p$basic$t_yr, +# lt = data.table(i$R_table_mort_lifeTable), +# full_output = FALSE +# ) + +#' Calculate general population survival curves for IPD data on age/sex/line +#' +#' @param R_table_patientagesex IPD including columns Sex ("M"/"F"), Age (numeric) and Line (1-4) +#' @param R_table_mort_lifeTable the named range R_table_mort_lifeTable from the excel input sheet +#' @param t_yr vector of time in years at each cycle, stored in p$basic$t_yr +#' @param lookups list of lookups +f_surv_GenOSLines_ipd <- function(R_table_patientagesex, R_table_mort_lifeTable, t_yr, lookups) { + + # Calculate a matrix with survival curves across columns where every row + # corresponds to a row in R_table_patientagesex + all_genpop_os <- t(mapply( + FUN = f_surv_getgpopOSFast, + bl_mal = 1 * (R_table_patientagesex$Gender == "M"), + bl_age = R_table_patientagesex$Age, + MoreArgs = list(t_yr = p$basic$t_yr, lt = data.table(R_table_mort_lifeTable)) + )) + + # Calculate the mean survival in each cycle within each line + lines_genpop_os <- lapply(unique(R_table_patientagesex$Line), function(li) + colMeans(all_genpop_os[R_table_patientagesex$Line == li,])) + + # Prepare the data structure to match what is produced by f_surv_GenOSLines_det + pop_ref <- paste0("pop_", lookups$ipd$pop$Number) + names(pop_ref) <- pop_ref + line_nums <- unique(R_table_patientagesex$Line) + names(line_nums) <- paste0("line_", line_nums) + + # Create the output + lapply(pop_ref, function(popu) { + lapply(line_nums, function(lin) { + list( + d = list(pop = popu, line = paste0("line_", lin)), + os = lines_genpop_os[[lin]] + ) + }) + }) +} + +#' Deterministic gen pop OS line - one per row of the table in Excel (R_table_ptchar) +#' +#' @param R_table_ptchar the named range R_table_ptchar from the excel input sheet +#' @param R_table_mort_lifeTable the named range R_table_mort_lifeTable from the excel input sheet +#' @param t_yr vector of time in years at each cycle, stored in p$basic$t_yr +#' @param lookups list of lookups +#' +f_surv_GenOSLines_det <- function(R_table_ptchar, R_table_mort_lifeTable, t_yr, lookups) { + + n_pops <- nrow(R_table_ptchar) + + pop_ref <- paste0("pop_",lookups$ipd$pop$Number[match(unique(R_table_ptchar$Population),lookups$ipd$pop$Description)]) + names(pop_ref) <- pop_ref + + line_ref <- paste0("line_",unique(R_table_ptchar$Treatment.line)) + names(line_ref) <- line_ref + + # Make an empty structure using the labelling above (consistent with + # labelling from excel and R model) + empty_structure <- lapply(pop_ref, function(popu) { + lapply(line_ref, function(lin) { + list( + d = list(), + os = NULL + ) + }) + }) + + # Generate flat list containing our OS lines and some information on destination + os_list <- lapply(1:n_pops, function(ptchar_row) { + + # Note that in 1st line it needs to divide up into 6 different populations + # for prior IO and no prior IO + + dat <- as.list(R_table_ptchar[ptchar_row, ]) + + return(list( + d = list( + pop = paste0("pop_", lookups$ipd$pop[match(dat$Population, Description), ]$Number), + line = paste0("line_", dat$Treatment.line) + ), + os = f_surv_getgpopOSFast( + bl_mal = 1 - dat$Starting...female.Mean, + bl_age = dat$Starting.age..years..Mean, + t_yr = t_yr, + lt = data.table(R_table_mort_lifeTable), + full_output = FALSE + ) + )) + + }) + + # Dynamically slot the stuff from the flat list into the structured list: + return(Reduce( + x = 1:length(os_list), + init = empty_structure, + accumulate = FALSE, + f = function(prev, excel_row) { + + dat <- os_list[[excel_row]] + d <- dat$d + pl <- prev[[d$pop]][[d$line]] + pl$d <- d + pl$os <- dat$os + prev[[d$pop]][[d$line]] <- pl + + return(prev) + } + )) +} + + + + + + +#' Adjust a single survival curve for general population mortality +#' +#' This function will ensure that the rate of mortality does not drop below the +#' rate of general population mortality. +#' +#' @param base_age Age when t = 0 (at model start) +#' @param cycle_length Length of cycle (in years, e.g., 1/52) +#' @param v_t_os Vector of times for overall survival (t = 0 at model +#' start) +#' @param v_p_os Vector of overall survival probability, i.e., +#' `v_p_os[i]` gives the probability of surviving to at +#' least `v_t_os[i]` +#' @param v_x_lifetable Vector of ages for life table +#' @param v_q_lifetable Vector of life table per-year mortality risk values, +#' i.e., `v_q_lifetable[i]` gives the probability that +#' somebody who reaches their `v_x_lifetable[i]`th birthday +#' will die before reaching their next birthday +#' @param .warn If TRUE (default) will issue a warning detailing (if +#' applicable) when the life table gives a lower risk of +#' mortality than the overall survival curve +#' +#' @return A `data.frame` containing five columns: `t` (which will equal +#' `v_t_os`), `q_genmort` (the per-cycle *probability* of general +#' population mortality), `q_adjusted` (the per-cycle *probability* of +#' death following adjustment to ensure the rate of mortality does not +#' fall below the general population mortality), `s_genmort` (the +#' *survival curve* if only general population mortality applied), and +#' `s_adjusted` (the *survival curve* following adjustment) +adjust_single_survival <- function( + base_age, + cycle_length, + v_t_os, + v_p_os, + v_x_lifetable, + v_q_lifetable, + .warn = TRUE +) { + + # Transform v_q_lifetable to give hazard rates + v_r_lifetable <- -log(1 - v_q_lifetable) + + # Transform v_p_os to also give hazard rates (note that v_r_os will be one + # element shorter than v_p_os) + n <- length(v_t_os) + v_r_os <- (log(v_p_os[1:(n-1)]) - log(v_p_os[2:n])) / cycle_length + + # Line v_r_lifetable up with v_r_os + v_x_aligned <- v_t_os[1:(n-1)] + base_age + v_r_lifetable_aligned <- approx( + x = v_x_lifetable, + y = v_r_lifetable, + xout = v_x_aligned, + method = "constant" + )$y + + # Check if v_r_os goes below v_r_lifetable_aligned at any point + if (.warn & any(v_r_os < v_r_lifetable_aligned)) { + warning_msg <- paste( + "Mortality rate from life table exceeds extrapolated mortality at time", + v_t_os[which.max(v_r_os < v_r_lifetable_aligned)] + ) + warning(warning_msg) + } + + # Stitch together to get combined hazard rate vector + v_r_combined <- pmax(v_r_os, v_r_lifetable_aligned) + + # Convert to per-cycle probabilities of avoiding death + v_p_genmort <- exp(-v_r_lifetable_aligned * cycle_length) + v_p_combined <- exp(-v_r_combined * cycle_length) + + # Convert to survival + v_p_os_genmort <- Reduce( + f = `*`, + x = v_p_genmort, + init = 1, + accumulate = TRUE + ) + v_p_os_adjusted <- Reduce( + f = `*`, + x = v_p_combined, + init = 1, + accumulate = TRUE + ) + + # Return + data.frame( + t = v_t_os, + q_genmort = c(1 - v_p_genmort, NA_real_), + q_adjusted = c(1 - v_p_combined, NA_real_), + s_genmort = v_p_os_genmort, + s_adjusted = v_p_os_adjusted + ) + +} + +#' Adjust a survival curve using patient-level data on age and sex +#' +#' @param pts A `data.frame` with two columns, `age` (numeric) and `sex` (see +#' Details). Has one row for each patient. +#' @param s_os The survival curve that needs adjusting (calculated for each +#' cycle in the model) +#' @param .i A list containing inputs which have been loaded from the Excel +#' inputs file. If this is not provided then the function will look +#' for `i` in the global environment and stop with an error if it is +#' not found. +#' @param .p A list containing "cultivated" model parameters. If this is not +#' provided then the function will look for `p` in the global +#' environment and stop with an error if it is not found. +#' @param .warn If TRUE (default) will issue a warning detailing (if +#' applicable) when the life table gives a lower risk of +#' mortality than the overall survival curve +#' +#' @details The sex of participants can be specified in a number of ways. +#' Option 1: Character with "M" or "m" for men and "F" or "f" for women. +#' Option 2: Factor which coerces to characters consistent with Option 1. +#' Option 3: Integer or Boolean vector with 0/FALSE representing female +#' and 1/TRUE representing male. +#' +#' @return A `data.frame` containing seven columns: `t`, `q_genmort` (the per- +#' cycle *probability* of general population mortality), `q_adjusted` +#' (the per-cycle *probability* of death following adjustment to ensure +#' the rate of mortality does not fall below the general population +#' mortality), `s_genmort` (the *survival curve* if only general +#' population mortality applied), `s_adjusted` (the *survival curve* +#' following adjustment), `prop_male.genmort` (the proportion of the +#' remaining cohort which would be male if only general population +#' mortality applied), and `prop_male.adjusted` (the proportion of the +#' remaining cohort which are male after OS is adjusted for general +#' population mortality). +adjust_survival_individuals <- function(pts, s_os, .i = NULL, .p = NULL, .warn = TRUE) { + i <- if (is.null(.i)) get("i", envir = globalenv()) else .i + p <- if (is.null(.p)) get("p", envir = globalenv()) else .p + + age <- pts$age + sex <- pts$sex + + lifetables <- get_lifetables(i) + + if (is.factor(sex)) sex <- as.character(sex) + + if (is.character(sex)) { + sex <- toupper(sex) + stopifnot(all(sex %in% c("M", "F"))) + sex <- (sex == "M") + } + + if (is.numeric(sex)) stopifnot(all(sex %in% c(0, 1))) + + # Generate individual survival curves + individual_curves <- mapply( + FUN = function(age, sex, ...) { + adjust_single_survival( + base_age = age, + v_q_lifetable = if (sex) lifetables$q_male else lifetables$q_female, + ... + ) + }, + age = age, + sex = sex, + MoreArgs = list( + cycle_length = p$basic$cl_y, + v_t_os = p$basic$cl_y * (1:length(s_os) - 1), + v_p_os = s_os, + v_x_lifetable = lifetables$x, + .warn = .warn + ), + SIMPLIFY = FALSE + ) + + # Combine the survival curves + # + # Details: + # - Combined survival is simply the mean survival across individual survival + # curves + # - Combined rate and per-cycle probability of death are calculated from the + # individual rates and probabilities after adjusting for survival + t_combined <- individual_curves[[1]]$t + s_adjusted_combined <- apply( + X = do.call(cbind, lapply(individual_curves, `[[`, "s_adjusted")), + MARGIN = 1, + FUN = mean + ) + s_genmort_combined <- apply( + X = do.call(cbind, lapply(individual_curves, `[[`, "s_genmort")), + MARGIN = 1, + FUN = mean + ) + q_genmort_combined <- apply( + X = do.call(cbind, lapply(individual_curves, function(x) x$q_genmort * x$s_adjusted)), + MARGIN = 1, + FUN = mean + ) / s_adjusted_combined + q_adjusted_combined <- apply( + X = do.call(cbind, lapply(individual_curves, function(x) x$q_adjusted * x$s_adjusted)), + MARGIN = 1, + FUN = mean + ) / s_adjusted_combined + + prop_male_genmort <- apply( + X = mapply(function(.c, .s) .s * .c$s_genmort, .c = individual_curves, .s = sex), + MARGIN = 1, + FUN = mean + ) / s_genmort_combined + prop_male_combined <- apply( + X = mapply(function(.c, .s) .s * .c$s_adjusted, .c = individual_curves, .s = sex), + MARGIN = 1, + FUN = mean + ) / s_adjusted_combined + + # Return + data.frame( + t = t_combined, + q_genmort = q_genmort_combined, + q_adjusted = q_adjusted_combined, + s_genmort = s_genmort_combined, + s_adjusted = s_adjusted_combined, + prop_male.genmort = prop_male_genmort, + prop_male.adjusted = prop_male_combined + ) + +} + + +#' Adjust a survival curve +#' +#' @param sex Either a number between 0 and 1 giving the proportion +#' of patients who are male, or a vector of patient sex in +#' the form specified in `adjust_survival_individuals` +#' @param age Either a number giving the mean age of the population, +#' or a vector of individual patient ages which aligns +#' with `sex` +#' @param survivor The survivor function, with one observation per 1-week +#' model cycle. This is expected to be either a 2D array/ +#' matrix with two columns or a `data.frame` with two +#' columns. +#' @param .patient_level If `TRUE` will force the use of +#' `adjust_survival_individuals`. If not specified (or +#' specified as `NULL` the function will attempt to +#' determine whether patient level or aggregate data has +#' been supplied) +#' @param .i A list containing inputs which have been loaded from +#' the Excel inputs file. If this is not provided then the +#' function will look for `i` in the global environment +#' and stop with an error if it is not found. +#' @param .p A list containing "cultivated" model parameters. If +#' this is not provided then the function will look for +#' `p` in the global environment and stop with an error if +#' it is not found. +#' @param .warn If TRUE (default) will issue a warning detailing (if +#' applicable) when the life table gives a lower risk of +#' mortality than the overall survival curve +#' +#' @return A `data.frame` containing seven columns: `t`, `q_genmort` (the per- +#' cycle *probability* of general population mortality), `q_adjusted` +#' (the per-cycle *probability* of death following adjustment to ensure +#' the rate of mortality does not fall below the general population +#' mortality), `s_genmort` (the *survival curve* if only general +#' population mortality applied), `s_adjusted` (the *survival curve* +#' following adjustment), `prop_male.genmort` (the proportion of the +#' remaining cohort which would be male if only general population +#' mortality applied), and `prop_male.adjusted` (the proportion of the +#' remaining cohort which are male after OS is adjusted for general +#' population mortality). +adjust_survival <- function(sex, age, survivor, .patient_level = NULL, + .i = NULL, .p = NULL, .warn = TRUE) { + + # Check that sex and age are conformable + stopifnot(length(sex) == length(age)) + + # If .patient_level is not specified, infer it + if (is.null(.patient_level)) .patient_level <- (length(sex) > 1) + + # Extract v_survivor (the vector of only survival probabilities) + v_survivor <- if (is.list(survivor)) survivor[[2]] else survivor[,2] + + if (.patient_level) { + + return( + adjust_survival_individuals( + pts = data.frame(sex = sex, age = age), + s_os = v_survivor, + .i = .i, + .p = .p, + .warn = .warn + ) + ) + + } else { + + m <- adjust_survival_individuals( + pts = data.frame(sex = "M", age = age), + s_os = v_survivor, + .i = .i, + .p = .p, + .warn = .warn + ) + f <- adjust_survival_individuals( + pts = data.frame(sex = "F", age = age), + s_os = v_survivor, + .i = .i, + .p = .p, + .warn = .warn + ) + + t_combined <- m$t + + s_genmort_combined <- sex * m$s_genmort + (1 - sex) * f$s_genmort + s_adjusted_combined <- sex * m$s_adjusted + (1 - sex) * f$s_adjusted + + q_genmort_combined <- ( + sex * m$q_genmort * m$s_adjusted + (1 - sex) * f$q_genmort * f$s_adjusted + ) / s_adjusted_combined + + q_adjusted_combined <- ( + sex * m$q_adjusted * m$s_adjusted + (1 - sex) * f$q_adjusted * f$s_adjusted + ) / s_adjusted_combined + + prop_male_genmort <- (sex * m$s_genmort) / s_genmort_combined + prop_male_combined <- (sex * m$s_adjusted) / s_adjusted_combined + + return(data.frame( + t = t_combined, + q_genmort = q_genmort_combined, + q_adjusted = q_adjusted_combined, + s_genmort = s_genmort_combined, + s_adjusted = s_adjusted_combined, + prop_male.genmort = prop_male_genmort, + prop_male.adjusted = prop_male_combined + )) + + } +} + + + +# testing ground ---------------------------------------------------------- + +if (FALSE) { + # Make plot for output: + gp <- get_elem(p$surv$gpop,"os") + + gp <- rbindlist(list( + pop_0 = data.table( + population = p$basic$lookup$ipd$pop[match(0,p$basic$lookup$ipd$pop$Number),]$Description, + gpop_os = gp$pop_0$line_1, + t = p$basic$t_yr + ), + pop_1 = data.table( + population = p$basic$lookup$ipd$pop[match(1,p$basic$lookup$ipd$pop$Number),]$Description, + gpop_os = gp$pop_1, + t = p$basic$t_yr + ), + pop_2 = data.table( + population = p$basic$lookup$ipd$pop[match(2,p$basic$lookup$ipd$pop$Number),]$Description, + gpop_os = gp$pop_2, + t = p$basic$t_yr + ) + )) + gpop_pop_plot <- ggplot(gp, aes(x = t, y = gpop_os, colour = population)) + + geom_line() + + theme_classic() + + theme(legend.position = "bottom", legend.title=element_blank()) + + labs(title = NULL, x = "Time (years)", y = "% Survival") + + scale_x_continuous(expand = expansion(mult = c(0,0.05))) + + scale_y_continuous(labels = scales::percent) + + ggsave( + filename = file.path("./4_Output","gpop_plot_pop_level.png"), + plot = gpop_pop_plot, + device = "png", + units = "cm", + width = 15 + ) + + +} + diff --git a/3_Functions/survival/treatment_effect_waning.R b/3_Functions/survival/treatment_effect_waning.R new file mode 100644 index 0000000..841e81d --- /dev/null +++ b/3_Functions/survival/treatment_effect_waning.R @@ -0,0 +1,422 @@ +#' Apply treatment effect waning +#' +#' @param surv_active A survivor function as a vector giving the probability of +#' having not experienced the event of interest at the start +#' of each cycle +#' @param surv_ref A reference survivor function - as the treatment effect +#' wanes, the hazard function will switch to the one given +#' by this curve +#' @param start_cycle The cycle in which the treatment effect begins to wane +#' @param finish_cycle The cycle in which the treatment effect finishes waning +#' @param apply_waning Change this to FALSE to bypass waning altogether +#' @param wcc With the default value (0.5), the proportion of treatment +#' effect which remains is based on the mid-point of the +#' cycle. Set `wcc = 0` to base it on the start of the cycle +#' or `wcc = 1` for the end of the cycle. +#' +#' @details The hazard rate in the survivor function adjusted for treatment +#' effect waning is assumed to be $r h_1(t_i) + (1-r) h_0(t_i)$, where $r$ is +#' proportion of treatment effect remaining, $h_1$ is the active treatment +#' hazard rate and $h_0$ is the reference treatment hazard rate. $r = 1$ for +#' any cycle earlier than `start_cycle` and $r = 0$ for `finish_cycle` and +#' subsequent cycles. For cycles from `start_cycle` and earlier than +#' `finish_cycle`, linear interpolation is used based on the mid-point of the +#' cycle assuming that $r = 1$ at the start of `start_cycle` and $r = 0$ at +#' the start of `finish_cycle`. If `start_cycle` and `finish_cycle` are the +#' same, then there is no interpolation as the effect wanes immediately. +#' NOTE: The first cycle is numbered 0. +treatment_effect_waning <- function( + surv_active, surv_ref, start_cycle, finish_cycle, apply_waning = TRUE, + wcc = 0.5 + ) { + + if (apply_waning == FALSE) return(surv_active) + + n <- length(surv_active) + + if (n < start_cycle) return(surv_active) + + # Check inputs are valid + stopifnot(start_cycle <= finish_cycle) + stopifnot(length(surv_active) == length(surv_ref)) + + res <- numeric(n) + + # Copy survival curve exactly before `start_cycle` + res[1:(start_cycle-1)] <- surv_active[1:(start_cycle-1)] + + # Calculate per cycle hazard + h_active <- log(surv_active[1:(n-1)]) - log(surv_active[2:n]) + h_ref <- log(surv_ref[1:(n-1)]) - log(surv_ref[2:n]) + + if (any(h_ref < h_active)) { + total_warnings <- sum(h_ref < h_active) + cycle_first_warning <- which.max(h_ref < h_active) + warning_msg <- paste( + "Hazard rate in the active treatment is more than the hazard rate in the", + "reference treatment in", total_warnings, "cycles, the first of which is", + (cycle_first_warning - 1) + ) + warning(warning_msg) + } + + # Calculate waning + t <- seq_along(surv_active) - 1 + wcc + r <- if (start_cycle < finish_cycle) + approx( + x = c(0, start_cycle, finish_cycle, Inf), + y = c(1, 1, 0, 0), + xout = t[1:(n-1)] + )$y + else + rep(c(1, 0), times = c(start_cycle, n - start_cycle - 1)) + + # Calculate adjusted hazards + h_adj <- r * h_active + (1 - r) * h_ref + p_adj <- exp(-h_adj) + + # Produce adjusted survivor function + s_adj <- Reduce( + f = `*`, + x = p_adj, + init = 1, + accumulate = TRUE + ) + + res[start_cycle:n] <- s_adj[start_cycle:n] + + res + +} + + +#' Apply treatment effect waning MODIFIED TO INCLUDE ABSOLUTE WANING +#' +#' @param surv_active A survivor function as a vector giving the probability of +#' having not experienced the event of interest at the start +#' of each cycle +#' @param surv_ref A reference survivor function - as the treatment effect +#' wanes, the hazard function will switch to the one given +#' by this curve +#' @param start_cycle The cycle in which the treatment effect begins to wane +#' @param finish_cycle The cycle in which the treatment effect finishes waning +#' @param apply_waning Change this to FALSE to bypass waning altogether +#' @param wcc With the default value (0.5), the proportion of treatment +#' effect which remains is based on the mid-point of the +#' cycle. Set `wcc = 0` to base it on the start of the cycle +#' or `wcc = 1` for the end of the cycle. +#' @param method Apply treatment effect waning to the hazards (default) +#' or the absolute survival at time t (sometimes used in +#' cost-effectiveness models) +#' +#' @details The hazard rate in the survivor function adjusted for treatment +#' effect waning is assumed to be $r h_1(t_i) + (1-r) h_0(t_i)$, where $r$ is +#' proportion of treatment effect remaining, $h_1$ is the active treatment +#' hazard rate and $h_0$ is the reference treatment hazard rate. $r = 1$ for +#' any cycle earlier than `start_cycle` and $r = 0$ for `finish_cycle` and +#' subsequent cycles. For cycles from `start_cycle` and earlier than +#' `finish_cycle`, linear interpolation is used based on the mid-point of the +#' cycle assuming that $r = 1$ at the start of `start_cycle` and $r = 0$ at +#' the start of `finish_cycle`. If `start_cycle` and `finish_cycle` are the +#' same, then there is no interpolation as the effect wanes immediately. +#' NOTE: The first cycle is numbered 0. +treatment_effect_waning_with_absolute <- function( + surv_active, surv_ref, start_cycle, finish_cycle, apply_waning = TRUE, + wcc = 0.5, method = "h", if_cross_use_worst = TRUE + ) { + + # method must be h or a (hazard or absolute) + stopifnot(method %in% c("h", "a")) + + if (apply_waning == FALSE) return(surv_active) + + n <- length(surv_active) + + if (n < start_cycle) return(surv_active) + + # Check inputs are valid + stopifnot(start_cycle <= finish_cycle) + if(length(surv_active) != length(surv_ref)) { + if (length(surv_active) > length(surv_ref)) { + new_index <- (length(surv_ref)+1):length(surv_active) + surv_ref[new_index] <- surv_active[new_index] + } else { + new_index <- (length(surv_active)+1):length(surv_ref) + surv_active[new_index] <- surv_ref[new_index] + } + } + + res <- numeric(n) + + # Copy survival curve exactly before `start_cycle` + res[1:(start_cycle-1)] <- surv_active[1:(start_cycle-1)] + + # If the method is a then the operation is very simple - simply interpolate + # absolute survival from the active curve to the reference curve using the + # start and end values + + if (method == "a") { + + # Instant waning - people instantly die when the treatment effect starts to + # disappears all at once... + if (start_cycle == finish_cycle) { + res[start_cycle:length(res)] <- pmin( + surv_ref[start_cycle:length(res)], + surv_active[start_cycle:length(res)] + ) + return(res) + } else { + # Gradual waning but on absolute survival - linearly interpolate absolute + # survival between the two curves and return the result. Perform interpolation + # take the values we need, use them to populate the rest of the vector. + stopifnot(start_cycle < finish_cycle) + t <- seq_along(surv_active) - 1 + wcc + w_t <- approx( + x = c(0, start_cycle, finish_cycle, Inf), + y = c(1, 1, 0, 0), + xout = t[1:(n-1)] + )$y[start_cycle:n] + + if (if_cross_use_worst) { + surv_ref <- pmin(surv_active,surv_ref) + } + + res[start_cycle:n] <- + (surv_active[start_cycle:n] * (w_t)) + + (surv_ref[start_cycle:n] * (1-w_t)) + return(res) + } + + } else if (method == "h") { + + + # Calculate per cycle hazard + h_active <- log(surv_active[1:(n-1)]) - log(surv_active[2:n]) + h_ref <- log(surv_ref[1:(n-1)]) - log(surv_ref[2:n]) + + if (any(h_ref < h_active)) { + total_warnings <- sum(h_ref < h_active) + cycle_first_warning <- which.max(h_ref < h_active) + warning_msg <- paste( + "Hazard rate in the active treatment is more than the hazard rate in the", + "reference treatment in", + total_warnings, + "cycles, the first of which is", + (cycle_first_warning - 1) + ) + if (if_cross_use_worst) { + warning_msg <- paste0(warning_msg," . Using the higher of the two hazards where they cross!") + h_ref <- pmax(h_ref, h_active) + } + warning(warning_msg, immediate. = TRUE) + } + + # Calculate waning + t <- seq_along(surv_active) - 1 + wcc + r <- if (start_cycle < finish_cycle) { + approx( + x = c(0, start_cycle, finish_cycle, Inf), + y = c(1, 1, 0, 0), + xout = t[1:(n-1)] + )$y + } else { + rep(c(1, 0), times = c(start_cycle, n - start_cycle - 1)) + } + + # Calculate adjusted hazards + h_adj <- r * h_active + (1 - r) * h_ref + p_adj <- exp(-h_adj) + + # Produce adjusted survivor function + s_adj <- cumprod(c(1,p_adj)) + + # QC NOTE for TS: the above is identical to the below commented out lines + # s_adj <- Reduce( + # f = `*`, + # x = p_adj, + # init = 1, + # accumulate = TRUE + # ) + + res[start_cycle:n] <- s_adj[start_cycle:n] + + return(res) + } else { + stop("Method should be h or a - this error shouldn't be possible!") + } +} + + +# convenience functions --------------------------------------------------- + +#' Translator function to return the argument needed for the waning function on method +#' using the dropdown option from excel +#' +#' @param m either `ref trt hazard` or `ref trt abs surv` according to named range `apply_waning_to` in Excel +#' +f_misc_twaning_methodTranslate <- Vectorize(function(m) { + switch (m, + "ref trt hazard" = return("h"), + "ref trt abs surv" = return("a"), + "0" = NA + ) +},USE.NAMES = FALSE) + + + +# Function to apply in the cost-effectiveness model ----------------------- + +#' Function to apply treatment effect waning to all PLMTEs according to the table +#' provided in the excel inputs, under named range "R_table_TE_waning_settings". +#' +#' @param st_list the list of all extrapolations AFTER propagation of relative efficacy, i.e. `p$surv$st` +#' @param tab_waning named range `R_table_TE_waning_settings` from excel +#' @param tab_eff_set named range `R_table_eff_data_settings` from excel +#' +#' +f_surv_twaning_apply <- function(st_list, tab_waning, tab_eff_set, verbose = FALSE) { + + # abbreviate the waning table and translate the method: + t_w <- data.table(tab_waning) + t_w$row_orig <- 1:nrow(t_w) + t_e <- data.table(tab_eff_set) + + # Tidy up the excel table, filtering it down to just the yes: + t_w$method <- f_misc_twaning_methodTranslate(t_w$apply.to) + t_w <- t_w[!is.na(apply.to) & apply.waning == "Yes",] + t_w$row_func <- 1:nrow(t_w) + + # filter the efficacy table down to just destinations and origins, as that's + # all we need it for (d for destination o for origin): + d_o <- t_e[,list( + Include.in.this.analysis., + Population, + Treatment.line, + Molecule, + End.point, + Origin.population, + Origin.line, + Origin.treatment, + Origin.trial, + Origin.endpoint + )] + + # Cycle down the rows of t_w, updating st_list each time using d_o to give us + # the location of the reference curve and t_w to give us the location of the + # active curve to change in st_list + + return(Reduce( + x = 1:nrow(t_w), + init = st_list, + accumulate = FALSE, + f = function(prev, tw_row) { + + tw <- t_w[tw_row,] + + dN <- list( + pop = tw$Population, + line = tw$Treatment.line, + mol = tw$Treatment, + endpoint = tw$End.point + ) + + # Use the numeric destination id's to filter down d_o to the destination + # (Note that there's only one for each dest, and it's not sensitive to trial) + omatch <- d_o[Population == dN$pop & Treatment.line == dN$line & Molecule == dN$mol & End.point == dN$endpoint,] + + # if this curve isn't included in the analysis then nope + if (omatch$Include.in.this.analysis. == "No") return(prev) + + # Use the reference table to populate the destination trial (for completeness and QC) + dN$trial = omatch$Origin.trial + + # Use this information to derive the location within st_list for the + # active survival to be "twaned", and then the reference curve to use: + d <- list( + pop = paste0("pop_",dN$pop), + line = paste0("line_",dN$line), + mol = paste0("mol_",dN$mol), + trial = paste0("trial_",dN$trial), + endpoint = paste0("endpoint_",dN$endpoint) + ) + o <- list( + pop = paste0("pop_",omatch$Origin.population), + line = paste0("line_",omatch$Origin.line), + mol = paste0("mol_",omatch$Origin.treatment), + trial = paste0("trial_",omatch$Origin.trial), + endpoint = paste0("endpoint_",omatch$Origin.endpoint) + ) + + # if verbose, then return a nice message: + if (verbose) { + f_misc_colcat(paste0( + "TxWaning #",tw$row_orig," (",tw_row,"). d=.$", + paste(unlist(d),collapse = "$"), + " | o=.$", + paste(unlist(o),collapse = "$"), + " | m=", + tw$apply.to, + " | s=", + tw$start, + " | e=", + tw$end + )) + } + + # Get our plmte to update: + plmte <- f_misc_get_plmte(st_list,d) + + # Get the active curve and the reference curve + ref_curve <- f_misc_get_plmte(st_list,o)$st + + stopifnot(!is.null(ref_curve)) + + # perform treatment effect waning adjustment on the active curve, replacing + # the element "st" in plmte with the result + plmte$st <- treatment_effect_waning_with_absolute( + surv_active = plmte$st, + surv_ref = ref_curve, + start_cycle = tw$start, + finish_cycle = tw$end, + method = tw$method) + + # slot the plmte back into prev, now that it has been udpated, and return + prev[[d$pop]][[d$line]][[d$mol]][[d$trial]][[d$endpoint]] <- plmte + return(prev) + } + )) +} + + +# Testing ground: --------------------------------------------------------- + +if (FALSE) { + + # Run the model until you populate p$surv$st (i.e. the propagator). good idea + # to then output that to a file so you can play with it without having to + # rerun everything each time. + p <- list() + p$surv <- list() + p$surv$st <- readRDS("./1_Data/example_st.rds") + + # Example inputs: + st_act <- p$surv$st$pop_0$line_1$mol_1$trial_2$endpoint_0$st + st_ref <- p$surv$st$pop_0$line_1$mol_7$trial_2$endpoint_0$st + wan_st <- i$R_table_TE_waning_settings[Population == 0 & Treatment.line == 1 & Treatment == 1 & End.point == 0,]$start + wan_end <- i$R_table_TE_waning_settings[Population == 0 & Treatment.line == 1 & Treatment == 1 & End.point == 0,]$end + method<- "h" + + st_wan <- treatment_effect_waning_with_absolute( + surv_active = st_act, + surv_ref = st_ref, + start_cycle = wan_st, + finish_cycle = wan_end, + apply_waning = TRUE, + method = method) + + plot(st_wan, type="l") + lines(st_act, col="green") + lines(st_ref, col="red") + +} + diff --git a/3_Functions/utility/age_related.R b/3_Functions/utility/age_related.R new file mode 100644 index 0000000..fc03f9b --- /dev/null +++ b/3_Functions/utility/age_related.R @@ -0,0 +1,441 @@ +#' Functions to assist with creating age-related utility values (QALY weights) +#' +#' These are based on the model fits described (in limited detail) in Hernandez +#' Alava (2022) and more detailed model description in Hernandez Alava (2012). +#' +#' The statistical model is a mixture model but with added floor and ceiling +#' effects. +#' +#' REFERENCES +#' +#' Hernandez Alava M, Pudney S, Wailoo A. Estimating EQ-5D by age and sex for +#' the UK. NICE DSU Report. 2022. +#' Hernandez Alava M, Wailoo AJ, Ara R. Tails from the Peak District: Adjusted +#' Limited Dependent Variable Mixture Models of EQ-5D Questionnaire Health +#' State Utility Values. Value in Health 2012; 15(3):550-561. + +#' Pull the HSE 2014 models from Excel +#' +#' @param sex Choose from "female" and "male" to load the relevant model. +#' @param psa If `psa == TRUE` then instead of returning the maximum +#' likelihood parameter values, it will instead sample +#' from the MLE distribution. +#' @param .i Provides ability to inject a list `.i` which will be used +#' to obtain the tables instead of a full extract from Excel. +#' If this is not provided the function will take `i` from +#' the global environment. +extract_utility_ageadjust_coefs <- function( + sex = c("female", "male"), psa = FALSE, .i = NULL + ) { + + if (is.null(.i)) .i <- get("i", envir = globalenv()) + + sex <- match.arg(sex) + + tbl <- .i[[paste0("R_table_ageadjust_util_", sex)]] + + v_covariate_nm <- c("Age/10", "(Age/10)^2", "intercept") + v_mix_nm <- c("Age/10", "intercept") + + # Extract maximum likelihood estimates ---- + + tbl_coef <- if (sex == "female") tbl$Females.Coefficient else tbl$Males.Coefficient + + l_coefficients <- list( + # Component 1 + setNames(tbl_coef[2:4], v_covariate_nm), + + # Component 2 + setNames(tbl_coef[5:7], v_covariate_nm), + + # Component 3 + setNames(tbl_coef[8:10], v_covariate_nm) + ) + + l_mix_coef <- list( + # Component 1 + setNames(tbl_coef[11:12], v_mix_nm), + + # Component 2 + setNames(tbl_coef[13:14], v_mix_nm) + ) + + v_sigma <- tbl_coef[15:17] + + # Sample from MLE distribution if PSA ---- + + if (psa) { + + stopifnot(requireNamespace("MASS")) + + # Note that the MLE values of sigma have been transformed into + # the linear scale but the covariance matrix is for ln(sigma) + # + # As a result, we set the mean of the multivariate normal + # distribution to zero for these elements, and then we + # use the results as scale factors for the sigma values. + + mu <- c( + unlist(l_coefficients), + unlist(l_mix_coef), + rep(0, length.out = length(v_sigma)) + ) + + Sigma <- unname( + as.matrix( + as.data.frame( + lapply(tbl[2:17, 4:19], as.numeric) + ) + ) + ) + + x <- MASS::mvrnorm(n = 1, mu = mu, Sigma = Sigma) + + l_coefficients[[1]][1:3] <- x[1:3] + l_coefficients[[2]][1:3] <- x[4:6] + l_coefficients[[3]][1:3] <- x[7:9] + + l_mix_coef[[1]][1:2] <- x[10:11] + l_mix_coef[[2]][1:2] <- x[12:13] + + v_sigma <- v_sigma * exp(x[14:16]) + + + } + + # Return ---- + + list( + l_coefficients = l_coefficients, + l_mix_coef = l_mix_coef, + v_sigma = v_sigma + ) +} + +#' Add parameters for calculating EQ-5D population norms to a parameter list `p` +#' +#' @param p An existing parameter list to which parameters will be added +#' @param psa If `FALSE` then maximum likelihood estimates for parameters will +#' be added, while if `TRUE` values will be sampled from the MLE +#' asymptotic distribution +#' @param .i Provides ability to inject a list `.i` which will be used +#' to obtain the tables instead of a full extract from Excel. +#' If this is not provided the function will take `i` from +#' the global environment. +#' @returns A *new* list (because R will do copy-on-modify) with the +#' parameters added/replaced +add_population_utility_params <- function(p, psa = FALSE, .i = NULL) { + p$util$pop_norms <- list( + female = extract_utility_ageadjust_coefs("female", psa, .i), + male = extract_utility_ageadjust_coefs("male", psa, .i) + ) + return(p) +} + +#' Calculate the utility value for a single component of the ALDVMM model +#' +#' @param covariates Either a vector of patient covariates or a matrix (or +#' something coercible to a matrix) where each row gives the +#' covariates for a single patient (e.g. each row could +#' correspond to different ages) +#' @param coefficients A vector of coefficients which determine the mean of the +#' untruncated distribution. This must have the same length +#' as `covariates` if `covariates` is a vector or the same +#' length as the number of columns of `covariates`. +#' @param sigma The standard deviation of the untruncated distribution. +#' @param upper_limit The upper limit at which a ceiling effect is observed. +#' @param upper_to What happens to values greater than `upper_limit`. +#' @param lower_limit The lower limit at which a floor effect is observed. +#' @param lower_to What happens to values less than `lower_limit`. +#' @param type Either "mean", in which case the mean utility is +#' returned, or "sampled", in which case a sample or samples +#' from the distribution will be returned. +#' @param .n The number of samples (if greater than 1) when +#' `type == "sampled"`. This will be the number of samples +#' per patient if there is more than one patient given in +#' `covariates`, in which case different samples will be +#' returned in different columns. +utility_single_component <- function( + covariates, + coefficients, + sigma, + upper_limit = 0.883, + upper_to = 1.0, + lower_limit = -0.594, + lower_to = lower_limit, + type = c("mean", "sampled"), + .n = NULL + ) { + + type <- match.arg(type) + + ym <- if (is.null(dim(covariates))) sum(covariates * coefficients) else rowSums(as.matrix(covariates) %*% coefficients) + + if (type == "mean") { + + # Calculate mass on `upper_to` + w_upper <- pnorm(upper_limit, ym, sigma, lower.tail = FALSE) + + # Calculate mass on `lower_to` + w_lower <- pnorm(lower_limit, ym, sigma, lower.tail = TRUE) + + # Remaining mass + w_rem <- 1 - w_upper - w_lower + + # Calculate mean of remaining mass + # Note - If the location parameter of the truncated distribution is outside + # the truncation limits, this can result in significant numerical + # issues. We can replace with `truncnorm` package if this occurs, but + # for now I want to avoid requiring too many packages. + alpha <- (lower_limit - ym) / sigma + beta <- (upper_limit - ym) / sigma + mu_rem <- ym - sigma * (dnorm(beta) - dnorm(alpha)) / (pnorm(beta) - pnorm(alpha)) + + # Combine all components + return(w_upper * upper_to + w_lower * lower_to + w_rem * mu_rem) + + } else if (type == "sampled") { + + n_pt <- if (is.null(dim(covariates))) 1 else nrow(covariates) + + if (is.null(.n)) { + ys <- rnorm(n_pt, ym, sd = sigma) + + if (ys > upper_limit) ys <- upper_to + if (ys < lower_limit) ys <- lower_to + + return(ys) + } else { + ys <- array(rnorm(.n * n_pt, ym, sd = sigma), dim = c(n_pt, .n)) + + ys[ys > upper_limit] <- upper_to + ys[ys < lower_limit] <- lower_to + + return(ys) + } + } + +} + +#' Calculate the utility value for a full ALDVMM model +#' +#' @param covariates A vector of patient covariates. +#' @param l_coefficients A list of vectors of coefficients which determine the +#' mean of the untruncated distributions for the +#' components. Each vector of coefficients must have the +#' same length as `covariates`. +#' @param v_sigma The standard deviations of the untruncated +#' distributions. +#' @param l_mix_coef A list of vectors of coefficients which determine the +#' mixture weights using a multinomial logit approach. +#' Note that `l_mix_coef` should have one less component +#' than `l_coefficients` and `sigma` because the final +#' component is assumed to have linear predictor = 0. +#' @param upper_limit The upper limit at which a ceiling effect is observed. +#' @param upper_to What happens to values greater than `upper_limit`. +#' @param lower_limit The lower limit at which a floor effect is observed. +#' @param lower_to What happens to values less than `lower_limit`. +#' @param type Either "mean", in which case the mean utility is +#' returned, or "sampled", in which case a sample or +#' samples from the distribution will be returned. +#' @param .n The number of samples (if greater than 1) when +#' `type == "sampled"`. +utility_mixture <- function( + covariates, + l_coefficients, + v_sigma, + l_mix_coef, + upper_limit = 0.883, + upper_to = 1.0, + lower_limit = -0.594, + lower_to = lower_limit, + type = c("mean", "sampled"), + .n = NULL + ) { + + type <- match.arg(type) + + stopifnot(is.list(l_coefficients)) + stopifnot(length(l_coefficients) == length(v_sigma)) + stopifnot(length(l_coefficients) == length(l_mix_coef) + 1) + + n_c <- length(l_coefficients) + + if (is.null(dim(covariates))) { + mix_eta <- sapply(l_mix_coef, function(mix_coef) sum(covariates * mix_coef)) + mix_wt <- c(exp(mix_eta), 1) + mix_wt <- mix_wt / sum(mix_wt) + } else { + mix_eta <- sapply(l_mix_coef, function(mix_coef) rowSums(as.matrix(covariates) %*% mix_coef)) + mix_wt <- cbind(exp(mix_eta), 1) + mix_wt <- sweep(mix_wt, 1, rowSums(mix_wt), "/") + } + + if (type == "mean") { + + component_utility <- mapply( + utility_single_component, + l_coefficients, + v_sigma, + MoreArgs = list( + covariates = covariates, + upper_limit = upper_limit, + upper_to = upper_to, + lower_limit = lower_limit, + lower_to = lower_to, + type = type + ) + ) + + return(if (is.null(dim(covariates))) sum(mix_wt * component_utility) else rowSums(mix_wt * component_utility)) + + } else if (type == "sampled") { + + if (is.null(.n)) { + + cls <- which(rmultinom(1, 1, mix_wt) == 1) + + return( + utility_single_component( + covariates, + l_coefficients[[cls]], + v_sigma[cls], + upper_limit, + upper_to, + lower_limit, + lower_to, + "sampled" + ) + ) + + } else { + + cls <- rmultinom(1, .n, mix_wt) + + u_sampled <- mapply( + utility_single_component, + coefficients = l_coefficients, + sigma = v_sigma, + .n = cls, + MoreArgs = list( + covariates = covariates, + upper_limit = upper_limit, + upper_to = upper_to, + lower_limit = lower_limit, + lower_to = lower_to, + type = type + ), + SIMPLIFY = FALSE + ) + + u_sampled <- unlist(u_sampled) + + return(sample(u_sampled, size = length(u_sampled))) + + } + + } + +} + +#' Calculate the utility for the general population for given age(s) and sex +#' +#' @param age The age at which to evaluate the general population utility (the +#' function is vectorised over `age`) +#' @param sex The sex for which to evaluate general population utility (the +#' function is *NOT* vectorised over `sex`) +#' @param .p Allows for injection of `p` instead of looking for it in the +#' global environment. `p` is assumed to have the parameters for the +#' female and male ALDVMMs. +utility_genpop <- function(age, sex = c("female", "male"), .p = NULL) { + + if (is.null(.p)) .p <- get("p", envir = globalenv()) + + # Fetch the appropriate model from .p + model <- .p$util$pop_norms[[sex]] + + # l_mix_coef doesn't use the (age/10)^2 but we need to put it in otherwise + # we cannot use a single covar + + l_mix_coef <- lapply(model$l_mix_coef, function(mc) { + c(mc[1], `(Age/10)^2` = 0, mc[2]) + }) + + covar <- cbind(0.1 * age, 0.01 * age ^ 2, 1.0) + + utility_mixture( + covariates = covar, + l_coefficients = model$l_coefficients, + v_sigma = model$v_sigma, + l_mix_coef = l_mix_coef + ) + +} + +#' Adjust utilities for population norms by age and sex +#' +#' This function operates by applying population norms according to age +#' and sex, assuming that in the first cycle no adjustment is required +#' (i.e., it is already consistent with population norms) and making +#' subsequent adjustments to continue to reflect population norms. +#' +#' @param age Either a vector of ages (if supplying individual +#' patient data) or the mean age for a cohort. +#' @param sex Either a vector of sexes (which can be a +#' character vector with "male" and "female"), or +#' the proportion of the cohort which is male. +#' @param utilities The utilities to be adjusted: a list whose 2nd +#' is the unadjusted utility value for each cycle, +#' or a `data.frame` whose 2nd column is the un- +#' adjusted utility. +#' @param .patient_level Logical. If TRUE, will treat `age` and `sex` as +#' vectors with individual patient data which are +#' aligned. If FALSE, will assume that `age` gives +#' the mean age, and `sex` gives the proportion of +#' the cohort which are male. If not provided, the +#' function will attempt to infer whether cohort +#' or individual patient data has been provided. +#' @param .p Allows for injection of `p` instead of looking +#' for it in the global environment. +adjust_utility <- function(age, sex, utilities, .patient_level = NULL, .p = NULL) { + + # Check that sex and age are conformable + stopifnot(length(age) == length(sex)) + + # If .patient_level is not specified, infer it + if (is.null(.patient_level)) .patient_level <- (length(sex) > 1) + + # If .p was not injected, look for p in the global environment + if (is.null(.p)) .p <- get("p", envir = globalenv()) + + # Extract v_utilities + v_utilities <- if (is.list(utilities)) utilities[[2]] else utilities[,2] + + if (.patient_level) { + + ages <- lapply(age, function(age) age + .p$basic$cl_y * (seq_along(v_utilities) - 1)) + + a_genpop <- mapply( + FUN = utility_genpop, + age = ages, + sex = sex, + MoreArgs = list(.p = .p), + SIMPLIFY = TRUE + ) + + v_genpop <- apply(a_genpop, MARGIN = 1, FUN = mean) + + return((v_genpop / v_genpop[1]) * v_utilities) + + } else { + + ages <- age + .p$basic$cl_y * (seq_along(v_utilities) - 1) + + v_genpop_m <- utility_genpop(ages, "male", .p = .p) + v_genpop_f <- utility_genpop(ages, "female", .p = .p) + v_genpop <- sex * v_genpop_m + (1 - sex) * v_genpop_f + + return((v_genpop / v_genpop[1]) * v_utilities) + + } +} diff --git a/4_Output/Readme.Rmd b/4_Output/Readme.Rmd new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/4_Output/Readme.Rmd @@ -0,0 +1 @@ + diff --git a/ID6184 Using the R decision model (EAG instructions) noACIC.docx b/ID6184 Using the R decision model (EAG instructions) noACIC.docx new file mode 100644 index 0000000..98a25b9 Binary files /dev/null and b/ID6184 Using the R decision model (EAG instructions) noACIC.docx differ diff --git a/NICE-model-repo.Rproj b/NICE-model-repo.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/NICE-model-repo.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/README.md b/README.md new file mode 100644 index 0000000..77baaad --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# Exeter Oncology Model: RCC edition produced as part of NICE's pathways pilot + +The Exeter Oncology Model: RCC edition is a platform cost-effectiveness model encompassing each decision node in the disease area for advanced renal cell carcinoma. + +This model has been created as part of a NICE pilot aimed both at reducing the long-term resource requirements for appraisal within crowded treatment pathways and at assessing the feasibility of incorporating treatment sequences into decision making. + +The Exeter Oncology Model: RCC edition has been constructed as part of pathways pilot appraisal ID6186 and the appraisal of cabozantinib plus nivolumab ID6184. No data is contained in the code. All input data is contained in the data folder, dummy data is provided where the data used in the appraisal was marked as confidential. A user interface was originally planned be added to this model at a later stage in the project. + +The folder structure loosely follows ZIN guidelines: + + 1. Data: externally derived parameters should be saved here + 2. Scripts - one working script is provided (model_structure) which contains the following sections: + i. Installation - containing all information to make the model operational. This section also states the version of R and the packages used at the time of submission. + ii. Loading functions + iii. Model inputs including: loading of input parameters, conduct of survival analysis, application of relative effectiveness and formatting of inputs for costs and quality of life + iv. Population settings + v. Patient flow: this is where the patient flow is produced dependent on the selected model structure and results are produced and compiled + 3. Functions + 4. Output (intermediate and final output data can be saved to here) + + A tests section had been planned for a later phase of the project. + + Instructions on how to use the model can be found in document: ID6184 Using the R decision model (EAG instructions) noACIC + + If you re-use this model please appropriately credit PenTAG for the work and refer to it as the Exeter Oncology Model: RCC edition; Authors: Dawn Lee, Madhusubramanian Muthukumar, Alan Lovell, Caroline Farmer, Darren Burns, Justin Matthews, Helen Coelho, Brian O’Toole, Laura A Trigg, Tristan Snowsill, Maxwell S Barnish, Thalia Nikoglou, Amanda Brand, Zain Ahmad, Ahmed Abdelsabour, Sophie Robinson, Edward CF Wilson, G.J. Melendez-Torres. diff --git a/tests/testthat/test-age_related.R b/tests/testthat/test-age_related.R new file mode 100644 index 0000000..8c1b7b4 --- /dev/null +++ b/tests/testthat/test-age_related.R @@ -0,0 +1,196 @@ +require(here) +source(here::here("3_Functions", "utility", "age_related.R")) + +# When the tables are brought in from Excel they are in a bad way... +.i <- list( + R_table_ageadjust_util_female = data.frame( + Comp...Prob = c(NA, "Comp. 1", NA, NA, "Comp. 2", NA, NA, "Comp. 3", NA, NA, "Probability", "Comp. 1", "Probability", "Comp. 2", NA, NA, NA), + Parameter.name = c(NA, "Age/10", "(Age/10)^2", "intercept", "Age/10", "(Age/10)^2", "intercept", "Age/10", "(Age/10)^2", "intercept", "Age/10", "intercept", "Age/10", "intercept", "ln(sigma 1)", "ln(sigma 2)", "ln(sigma 3)"), + Females.Coefficient = c(NA, -0.0774, 0.0064, 0.299, -0.0147, -3e-04, 0.8708, 0.2043, -0.0241, 1.1659, 0.4028, -4.4767, 0.1937, -1.3549, 0.1282, 0.0831, 0.523), + Comp..1 = c("Age/10", "0.00181392505720311", "-0.000142821693113575", "-0.0055479120716095", "-8.40757439459012E-005", "7.68974788428647E-006", "0.000190677927101904", "7.6302791281555E-005", "2.68779546956997E-006", "-0.00188544592603937", "0.000587152824726099", "-0.00481411649614175", "8.6460875110737E-005", "-0.00166767866848402", "-0.00156514893635572", "-0.000663410325683143", "-0.000972530455969163"), + X5 = c("(Age/10)^2", "-0.000142821693113575", "1.16455508725469E-005", "0.000418371324371852", "7.82095798774855E-006", "-7.12226266273064E-007", "-1.80062162214054E-005", "-2.26878716411079E-005", "1.48733633962288E-006", "0.000186201780049869", "-3.19362611056718E-005", "0.000268200605768575", "-3.59817626829164E-006", "0.000107750120010565", "0.000100070484970427", "4.96783571225564E-005", "9.3754534593677E-005"), + X6 = c("intercept", "-0.0055479120716095", "0.000418371324371852", "0.017929999114245", "0.000204381130695266", "-1.88905888936587E-005", "-0.000448400461518393", "0.000726813592080134", "-0.000105482348999307", "0.00392086064903466", "-0.00250180036404139", "0.020339890843711", "-0.000422003990381206", "0.00652323274561575", "0.00591937681079903", "0.00229152315171725", "0.00243235134805328"), + Comp..2 = c("Age/10", "-8.40757439459012E-005", "7.82095798774855E-006", "0.000204381130695266", "0.000129215443937247", "-1.17793555804664E-005", "-0.000304065105412462", "-0.000475467875705631", "4.19123774206381E-005", "0.00230435810147678", "0.000142215345506761", "-0.000800368532530154", "1.28042740613655E-005", "0.000599852244867536", "-0.00016384837041777", "0.000332775202597476", "0.00137953646923537"), + X8 = c("(Age/10)^2", "7.68974788428647E-006", "-7.12226266273064E-007", "-1.88905888936587E-005", "-1.17793555804664E-005", "1.10760266422821E-006", "2.63878432142222E-005", "4.28812634826882E-005", "-3.85817932365746E-006", "-0.000207192172603296", "-1.38120152179962E-005", "7.61484851195201E-005", "-3.80182641717489E-007", "-6.40549916222688E-005", "1.42176634583524E-005", "-3.31596324456354E-005", "-0.000126872002764403"), + X9 = c("intercept", "0.000190677927101904", "-1.80062162214054E-005", "-0.000448400461518393", "-0.000304065105412462", "2.63878432142222E-005", "0.000781154174073528", "0.00114152107318473", "-9.72997109845831E-005", "-0.00550983733742592", "-0.000291616289476088", "0.00170190898250888", "-6.85206409897883E-005", "-0.000888429870817027", "0.000408287149410796", "-0.00061462673994418", "-0.00312609418508427"), + Comp..3 = c("Age/10", "7.6302791281555E-005", "-2.26878716411079E-005", "0.000726813592080134", "-0.000475467875705631", "4.28812634826882E-005", "0.00114152107318473", "0.00806160262858", "-0.000764694318342423", "-0.0208673418054048", "-0.00158080633547213", "0.0122473970122137", "-4.41025130039184E-006", "-8.71455602468233E-005", "0.00258293728957508", "0.000116466474103241", "-0.00505463894217601"), + X11 = c("(Age/10)^2", "2.68779546956997E-006", "1.48733633962288E-006", "-0.000105482348999307", "4.19123774206381E-005", "-3.85817932365746E-006", "-9.72997109845831E-005", "-0.000764694318342423", "7.5493603505479E-005", "0.00181159033077672", "0.000185660167714886", "-0.00136422743399523", "2.55924927356128E-005", "-0.000158988348362669", "-0.000256232307785368", "-2.4374052739607E-005", "0.000390936271290489"), + X12 = c("intercept", "-0.00188544592603937", "0.000186201780049869", "0.00392086064903466", "0.00230435810147678", "-0.000207192172603296", "-0.00550983733742592", "-0.0208673418054048", "0.00181159033077672", "0.0923336511771454", "0.00552376527983363", "-0.0361265561727065", "-0.000674465305138563", "0.023043690461186", "-0.00789090909426419", "0.00812722271087936", "0.0508836994965293"), + Probability = c("Age/10", "0.000587152824726099", "-3.19362611056718E-005", "-0.00250180036404139", "0.000142215345506761", "-1.38120152179962E-005", "-0.000291616289476088", "-0.00158080633547213", "0.000185660167714886", "0.00552376527983363", "0.00374720476972074", "-0.0235409197342492", "0.000813711359859942", "-0.00315976776505383", "-0.00278988754751826", "0.000371359871490182", "0.0049723966315152"), + Comp..1.1 = c("intercept", "-0.00481411649614175", "0.000268200605768575", "0.020339890843711", "-0.000800368532530154", "7.61484851195201E-005", "0.00170190898250888", "0.0122473970122137", "-0.00136422743399523", "-0.0361265561727065", "-0.0235409197342492", "0.160964976476367", "-0.00456257092300774", "0.0235248148927389", "0.0206457263706382", "0.000765815120668832", "-0.0295522138203499"), + Probability.1 = c("Age/10", "8.6460875110737E-005", "-3.59817626829164E-006", "-0.000422003990381206", "1.28042740613655E-005", "-3.80182641717489E-007", "-6.85206409897883E-005", "-4.41025130039184E-006", "2.55924927356128E-005", "-0.000674465305138563", "0.000813711359859942", "-0.00456257092300774", "0.00119093744451877", "-0.00644165502249595", "-0.000590416579476868", "-8.86112282562339E-005", "0.000251477628529736"), + Comp..2.1 = c("intercept", "-0.00166767866848402", "0.000107750120010565", "0.00652323274561575", "0.000599852244867536", "-6.40549916222688E-005", "-0.000888429870817027", "-8.71455602468233E-005", "-0.000158988348362669", "0.023043690461186", "-0.00315976776505383", "0.0235248148927389", "-0.00644165502249595", "0.0504179889713566", "0.00320982232761227", "0.00753199473767298", "0.0180944191956392"), + ln.sigma. = c("ln(sigma 1)", "-0.00156514893635572", "0.000100070484970427", "0.00591937681079903", "-0.00016384837041777", "1.42176634583524E-005", "0.000408287149410796", "0.00258293728957508", "-0.000256232307785368", "-0.00789090909426419", "-0.00278988754751826", "0.0206457263706382", "-0.000590416579476868", "0.00320982232761227", "0.00705433604345044", "0.000337621060753791", "-0.00545270993573478"), + X18 = c("ln(sigma 2)", "-0.000663410325683143", "4.96783571225564E-005", "0.00229152315171725", "0.000332775202597476", "-3.31596324456354E-005", "-0.00061462673994418", "0.000116466474103241", "-2.4374052739607E-005", "0.00812722271087936", "0.000371359871490182", "0.000765815120668832", "-8.86112282562339E-005", "0.00753199473767298", "0.000337621060753791", "0.00440694744795823", "0.00821659420933332"), + X19 = c("ln(sigma 3)", "-0.000972530455969163", "9.3754534593677E-005", "0.00243235134805328", "0.00137953646923537", "-0.000126872002764403", "-0.00312609418508427", "-0.00505463894217601", "0.000390936271290489", "0.0508836994965293", "0.0049723966315152", "-0.0295522138203499", "0.000251477628529736", "0.0180944191956392", "-0.00545270993573478", "0.00821659420933332", "0.0460540905247068") + ), + R_table_ageadjust_util_male = data.frame( + Comp...Prob = c(NA, "Comp. 1", NA, NA, "Comp. 2", NA, NA, "Comp. 3", NA, NA, "Probability", "Comp. 1", "Probability", "Comp. 2", NA, NA, NA), + Parameter.name = c(NA, "Age/10", "(Age/10)^2", "intercept", "Age/10", "(Age/10)^2", "intercept", "Age/10", "(Age/10)^2", "intercept", "Age/10", "intercept", "Age/10", "intercept", "ln(sigma 1)", "ln(sigma 2)", "ln(sigma 3)"), + Males.Coefficient = c(NA, -0.1609, 0.0129, 0.566, -0.008, -7e-04, 0.8502, -0.0316, -0.0051, 1.8194, 0.2804, -4.3366, 0.1445, -1.5277, 0.114, 0.0694, 0.5402), + Comp..1 = c("Age/10", "0.00180805718467376", "-0.000155394881440412", "-0.00469913686058501", "-2.04952315731803E-005", "1.76923239057346E-006", "4.13832367350342E-005", "0.000724985445230411", "-4.29945708400744E-005", "-0.00419163322868301", "0.000243660127142515", "0.000556416440293352", "8.68801453301919E-006", "-0.000753361254277279", "0.00140970939608021", "-0.00030592183178308", "-0.00246332628313912"), + X5 = c("(Age/10)^2", "-0.000155394881440412", "1.37293340332065E-005", "0.000389989054876887", "1.15986392145827E-006", "-9.6203795448618E-008", "-2.28470786199977E-006", "-5.648506654603E-005", "3.28653129116847E-006", "0.000331297591343349", "-1.73125504820459E-005", "-6.93863510738457E-005", "3.58862037847825E-007", "4.95701869200483E-005", "-0.000109852868343991", "1.86262054382121E-005", "0.000196433717937509"), + X6 = c("intercept", "-0.00469913686058501", "0.000389989054876887", "0.0129220764505025", "0.00011009363782216", "-1.04025906841095E-005", "-0.00021221368996537", "-0.00235626079994355", "0.000145555525358209", "0.0139005433706322", "-0.000666785188679418", "-0.000974292019637207", "5.06833797379128E-005", "0.00285419281192027", "-0.00431979774938225", "0.00177163662118351", "0.0082152338918358"), + Comp..2 = c("Age/10", "-2.04952315731803E-005", "1.15986392145827E-006", "0.00011009363782216", "0.000153983966039955", "-1.36488145264166E-005", "-0.000384174295666304", "-0.000678878300109977", "5.38490308619695E-005", "0.00303714645783587", "9.2890859246708E-005", "-0.000416866418886413", "0.000135744870622466", "0.000170340333384842", "-0.000257581188220602", "0.000710963391255698", "0.00124117311711967"), + X8 = c("(Age/10)^2", "1.76923239057346E-006", "-9.6203795448618E-008", "-1.04025906841095E-005", "-1.36488145264166E-005", "1.24910273084314E-006", "3.26431593531585E-005", "6.44394089603446E-005", "-5.16220406451199E-006", "-0.000293809272506211", "-9.37628800183464E-006", "3.99645405636231E-005", "-1.1841706886419E-005", "-3.17130333909983E-005", "2.3729404294288E-005", "-7.5703498485862E-005", "-0.000124608151214571"), + X9 = c("intercept", "4.13832367350342E-005", "-2.28470786199977E-006", "-0.00021221368996537", "-0.000384174295666304", "3.26431593531585E-005", "0.00101958664039823", "0.00152197651396501", "-0.000119747171492014", "-0.00654045239171073", "-0.000201913638339085", "0.000919351639672103", "-0.000354448771901043", "0.000311668000907189", "0.000558020538151863", "-0.00125776941422995", "-0.00245283652375875"), + Comp..3 = c("Age/10", "0.000724985445230411", "-5.648506654603E-005", "-0.00235626079994355", "-0.000678878300109977", "6.44394089603446E-005", "0.00152197651396501", "0.0113858914551752", "-0.000931927238570606", "-0.0457277118567312", "-0.000470852960992384", "0.0045059996437656", "-0.00038789136821903", "-0.00860299633420163", "0.00460629229305843", "-0.00768312816822898", "-0.0173519590055757"), + X11 = c("(Age/10)^2", "-4.29945708400744E-005", "3.28653129116847E-006", "0.000145555525358209", "5.38490308619695E-005", "-5.16220406451199E-006", "-0.000119747171492014", "-0.000931927238570606", "7.97305909426276E-005", "0.00348314479613714", "6.68265629615078E-005", "-0.000482915286976749", "4.44177723214262E-005", "0.000549945323083734", "-0.000321384872390157", "0.000573328600141507", "0.00123904583267989"), + X12 = c("intercept", "-0.00419163322868301", "0.000331297591343349", "0.0139005433706322", "0.00303714645783587", "-0.000293809272506211", "-0.00654045239171073", "-0.0457277118567312", "0.00348314479613714", "0.220920263819935", "0.00262782787645511", "-0.0229728839249657", "0.00227885855505096", "0.0516945030490172", "-0.02466047496201", "0.0451087733515744", "0.102106928289112"), + Probability = c("Age/10", "0.000243660127142515", "-1.73125504820459E-005", "-0.000666785188679418", "9.2890859246708E-005", "-9.37628800183464E-006", "-0.000201913638339085", "-0.000470852960992384", "6.68265629615078E-005", "0.00262782787645511", "0.00375620898511979", "-0.0205640680849767", "0.00098761301789045", "-0.00319365084944945", "7.82811452991396E-007", "0.00129025173814629", "0.00243439635247922"), + Comp..1.1 = c("intercept", "0.000556416440293352", "-6.93863510738457E-005", "-0.000974292019637207", "-0.000416866418886413", "3.99645405636231E-005", "0.000919351639672103", "0.0045059996437656", "-0.000482915286976749", "-0.0229728839249657", "-0.0205640680849767", "0.135277707719853", "-0.00520169978644947", "0.0216700313230661", "0.00736586742092584", "-0.00292680780215841", "-0.0205436877294718"), + Probability.1 = c("Age/10", "8.68801453301919E-006", "3.58862037847825E-007", "5.06833797379128E-005", "0.000135744870622466", "-1.1841706886419E-005", "-0.000354448771901043", "-0.00038789136821903", "4.44177723214262E-005", "0.00227885855505096", "0.00098761301789045", "-0.00520169978644947", "0.00128577080041762", "-0.00566979482005049", "-0.000345039831748517", "0.000810459882349813", "0.00183133956149149"), + Comp..2.1 = c("intercept", "-0.000753361254277279", "4.95701869200483E-005", "0.00285419281192027", "0.000170340333384842", "-3.17130333909983E-005", "0.000311668000907189", "-0.00860299633420163", "0.000549945323083734", "0.0516945030490172", "-0.00319365084944945", "0.0216700313230661", "-0.00566979482005049", "0.0543623781150663", "-0.00309975120152338", "0.0166892820314486", "0.0244096288469085"), + ln.sigma. = c("ln(sigma 1)", "0.00140970939608021", "-0.000109852868343991", "-0.00431979774938225", "-0.000257581188220602", "2.3729404294288E-005", "0.000558020538151863", "0.00460629229305843", "-0.000321384872390157", "-0.02466047496201", "7.82811452991396E-007", "0.00736586742092584", "-0.000345039831748517", "-0.00309975120152338", "0.0132715906497832", "-0.0029432939805352", "-0.0145268213896526"), + X18 = c("ln(sigma 2)", "-0.00030592183178308", "1.86262054382121E-005", "0.00177163662118351", "0.000710963391255698", "-7.5703498485862E-005", "-0.00125776941422995", "-0.00768312816822898", "0.000573328600141507", "0.0451087733515744", "0.00129025173814629", "-0.00292680780215841", "0.000810459882349813", "0.0166892820314486", "-0.0029432939805352", "0.0172254773943738", "0.0235021382458683"), + X19 = c("ln(sigma 3)", "-0.00246332628313912", "0.000196433717937509", "0.0082152338918358", "0.00124117311711967", "-0.000124608151214571", "-0.00245283652375875", "-0.0173519590055757", "0.00123904583267989", "0.102106928289112", "0.00243439635247922", "-0.0205436877294718", "0.00183133956149149", "0.0244096288469085", "-0.0145268213896526", "0.0235021382458683", "0.0584012933799599") + ) +) + +testthat::test_that("it calculates mean utilities correctly", { + + covariates <- data.frame( + age10 = as.numeric(16:101) / 10, + age10sq = as.numeric(16:101) ^ 2 / 100, + `(Intercept)` = 1.0 + ) + + l_coefficients <- list( + c(age10 = -0.0774, age10sq = 0.0064, `(Intercept)` = 0.2990), + c(age10 = -0.0147, age10sq = -0.0003, `(Intercept)` = 0.8708), + c(age10 = 0.2043, age10sq = -0.0241, `(Intercept)` = 1.1659) + ) + + sigma <- c(0.1282, 0.0831, 0.523) + + l_mix_coef <- list( + c(age10 = 0.4028, age10sq = 0.0, `(Intercept)` = -4.4767), + c(age10 = 0.1937, age10sq = 0.0, `(Intercept)` = -1.3549) + ) + + u_mean_female <- utility_mixture(covariates, l_coefficients, sigma, l_mix_coef, type = "mean") + u_mean_female <- unname(u_mean_female) + + target_u_female <- c( + 0.9117,0.9122,0.9126,0.9127,0.9127,0.9126,0.9122,0.9118,0.9112,0.9105, + 0.9096,0.9087,0.9076,0.9064,0.9051,0.9037,0.9022,0.9006,0.8989,0.8971, + 0.8952,0.8932,0.8912,0.8890,0.8868,0.8844,0.8820,0.8795,0.8769,0.8743, + 0.8715,0.8687,0.8658,0.8628,0.8598,0.8566,0.8534,0.8501,0.8467,0.8432, + 0.8397,0.8361,0.8324,0.8286,0.8247,0.8207,0.8167,0.8126,0.8083,0.8040, + 0.7996,0.7952,0.7906,0.7859,0.7812,0.7763,0.7714,0.7663,0.7612,0.7559, + 0.7506,0.7452,0.7396,0.7340,0.7282,0.7224,0.7164,0.7103,0.7042,0.6979, + 0.6915,0.6850,0.6784,0.6717,0.6649,0.6580,0.6510,0.6439,0.6367,0.6295, + 0.6222,0.6147,0.6073,0.5998,0.5922,0.5846 + ) + + expect_equal(u_mean_female, target_u_female, tolerance = 1e-4) + +}) + + +testthat::test_that("it extracts from `i` and calculates across ages", { + + target_u_female <- c( + 0.9117,0.9122,0.9126,0.9127,0.9127,0.9126,0.9122,0.9118,0.9112,0.9105, + 0.9096,0.9087,0.9076,0.9064,0.9051,0.9037,0.9022,0.9006,0.8989,0.8971, + 0.8952,0.8932,0.8912,0.8890,0.8868,0.8844,0.8820,0.8795,0.8769,0.8743, + 0.8715,0.8687,0.8658,0.8628,0.8598,0.8566,0.8534,0.8501,0.8467,0.8432, + 0.8397,0.8361,0.8324,0.8286,0.8247,0.8207,0.8167,0.8126,0.8083,0.8040, + 0.7996,0.7952,0.7906,0.7859,0.7812,0.7763,0.7714,0.7663,0.7612,0.7559, + 0.7506,0.7452,0.7396,0.7340,0.7282,0.7224,0.7164,0.7103,0.7042,0.6979, + 0.6915,0.6850,0.6784,0.6717,0.6649,0.6580,0.6510,0.6439,0.6367,0.6295, + 0.6222,0.6147,0.6073,0.5998,0.5922,0.5846 + ) + + target_u_male <- c( + 0.9446,0.9433,0.9420,0.9406,0.9392,0.9378,0.9363,0.9348,0.9332,0.9317, + 0.9300,0.9284,0.9267,0.9250,0.9232,0.9214,0.9196,0.9177,0.9158,0.9138, + 0.9118,0.9098,0.9077,0.9056,0.9034,0.9012,0.8989,0.8966,0.8943,0.8919, + 0.8895,0.8870,0.8845,0.8819,0.8793,0.8767,0.8740,0.8712,0.8684,0.8656, + 0.8627,0.8598,0.8568,0.8537,0.8507,0.8475,0.8444,0.8412,0.8379,0.8346, + 0.8312,0.8278,0.8244,0.8209,0.8174,0.8138,0.8102,0.8065,0.8028,0.7990, + 0.7952,0.7914,0.7875,0.7836,0.7796,0.7756,0.7716,0.7676,0.7635,0.7593, + 0.7552,0.7510,0.7468,0.7425,0.7383,0.7340,0.7297,0.7254,0.7211,0.7167, + 0.7124,0.7080,0.7036,0.6993,0.6949,0.6906 + ) + + p <- add_population_utility_params(list(), .i = .i) + + expect_equal(utility_genpop(16:101, "female", .p = p), target_u_female, tolerance = 1e-4) + expect_equal(utility_genpop(16:101, "male", .p = p), target_u_male, tolerance = 1e-4) + +}) + +testthat::test_that("the wrapper function works for aggregate data", { + + p_male <- 0.6 + mean_age <- 68.0 + + # Create a trace of condition-specific utility values + cycle <- 1:104 + u_condition <- rep(c(0.8, 0.6), c(52, 52)) + + # Get the output + .p <- list(basic = list(cl_y = 1/52)) + .p <- add_population_utility_params(.p, .i = .i) + output <- adjust_utility(age = mean_age, sex = p_male, utilities = data.frame(cycle = cycle, u = u_condition), .p = .p) + + # Target output calculated using Excel + target <- c(0.8,0.799925437,0.799850851,0.799776241,0.799701608,0.799626952,0.799552273,0.79947757,0.799402844,0.799328094,0.799253321,0.799178525,0.799103706,0.799028863,0.798953996,0.798879107,0.798804194,0.798729258,0.798654298,0.798579315,0.798504309,0.79842928,0.798354227,0.79827915,0.798204051,0.798128928,0.798053781,0.797978612,0.797903418,0.797828202,0.797752962,0.797677699,0.797602413,0.797527103,0.797451769,0.797376413,0.797301033,0.797225629,0.797150202,0.797074752,0.796999279,0.796923782,0.796848262,0.796772718,0.796697151,0.79662156,0.796545946,0.796470309,0.796394649,0.796318964,0.796243257,0.796167526,0.597068829,0.597011996,0.596955145,0.596898277,0.596841391,0.596784487,0.596727566,0.596670628,0.596613672,0.596556698,0.596499707,0.596442698,0.596385672,0.596328628,0.596271566,0.596214487,0.59615739,0.596100276,0.596043145,0.595985995,0.595928828,0.595871644,0.595814442,0.595757222,0.595699985,0.59564273,0.595585458,0.595528168,0.59547086,0.595413535,0.595356192,0.595298832,0.595241454,0.595184059,0.595126646,0.595069215,0.595011767,0.594954301,0.594896817,0.594839316,0.594781798,0.594724262,0.594666708,0.594609136,0.594551547,0.594493941,0.594436316,0.594378675,0.594321015,0.594263338,0.594205644,0.594147931) + + testthat::expect_equal(output, target, tolerance = 1e-6) +}) + +testthat::test_that("the wrapper function works for patient-level data", { + + sex <- rep(c("female", "male"), c(3, 2)) + age <- c(63.4, 87.6, 73.5, 66.0, 81.2) + + # Create a trace of condition-specific utility values + cycle <- 1:104 + u_condition <- rep(c(0.8, 0.6), c(52, 52)) + + # Get the output + .p <- list(basic = list(cl_y = 1/52)) + .p <- add_population_utility_params(.p, .i = .i) + output <- adjust_utility(age = age, sex = sex, utilities = data.frame(cycle = cycle, u = u_condition), .p = .p) + + # Target output calculated using Excel + target <- c(0.8,0.799906957,0.799813886,0.799720787,0.799627659,0.799534503,0.799441318,0.799348105,0.799254864,0.799161594,0.799068296,0.79897497,0.798881615,0.798788232,0.798694821,0.798601381,0.798507913,0.798414417,0.798320892,0.798227339,0.798133758,0.798040148,0.79794651,0.797852843,0.797759149,0.797665426,0.797571674,0.797477894,0.797384086,0.79729025,0.797196385,0.797102492,0.797008571,0.796914621,0.796820643,0.796726637,0.796632602,0.796538539,0.796444448,0.796350329,0.796256181,0.796162005,0.7960678,0.795973568,0.795879307,0.795785017,0.7956907,0.795596354,0.79550198,0.795407577,0.795313146,0.795218687,0.59634315,0.596272263,0.596201355,0.596130426,0.596059476,0.595988504,0.595917512,0.595846498,0.595775463,0.595704406,0.595633329,0.59556223,0.59549111,0.595419969,0.595348807,0.595277623,0.595206419,0.595135193,0.595063946,0.594992678,0.594921388,0.594850078,0.594778746,0.594707393,0.594636019,0.594564624,0.594493208,0.59442177,0.594350311,0.594278831,0.59420733,0.594135808,0.594064265,0.5939927,0.593921115,0.593849508,0.59377788,0.593706231,0.593634561,0.59356287,0.593491157,0.593419423,0.593347669,0.593275893,0.593204096,0.593132278,0.593060438,0.592988578,0.592916697,0.592844794,0.59277287,0.592700925) + + testthat::expect_equal(output, target, tolerance = 1e-6) +}) + +testthat::test_that("it adds parameters to `p`", { + p <- list(basic = list(cl_y = 1/52)) + p <- add_population_utility_params(p, .i = .i) + + expected_female <- list( + l_coefficients = list( + c(`Age/10` = -0.0774, `(Age/10)^2` = 0.0064, intercept = 0.2990), + c(`Age/10` = -0.0147, `(Age/10)^2` = -0.0003, intercept = 0.8708), + c(`Age/10` = 0.2043, `(Age/10)^2` = -0.0241, intercept = 1.1659) + ), + l_mix_coef = list( + c(`Age/10` = 0.4028, intercept = -4.4767), + c(`Age/10` = 0.1937, intercept = -1.3549) + ), + v_sigma = c(0.1282, 0.0831, 0.523) + ) + + expected_male <- list( + l_coefficients = list( + c(`Age/10` = -0.1609, `(Age/10)^2` = 0.0129, intercept = 0.5660), + c(`Age/10` = -0.0080, `(Age/10)^2` = -0.0007, intercept = 0.8502), + c(`Age/10` = -0.0316, `(Age/10)^2` = -0.0051, intercept = 1.8194) + ), + l_mix_coef = list( + c(`Age/10` = 0.2804, intercept = -4.3366), + c(`Age/10` = 0.1445, intercept = -1.5277) + ), + v_sigma = c(0.1140, 0.0694, 0.5402) + ) + + testthat::expect_equal(p$util$pop_norms$female, expected_female) + testthat::expect_equal(p$util$pop_norms$male, expected_male) +}) diff --git a/tests/testthat/test-markov.R b/tests/testthat/test-markov.R new file mode 100644 index 0000000..5787290 --- /dev/null +++ b/tests/testthat/test-markov.R @@ -0,0 +1,332 @@ +library(here) +source(here::here("3_Functions", "markov", "markov.R")) + +testthat::test_that("f_markov_M_prep works as expected", { + + N_cycle <- 10 + N_state <- 2 + + nr <- N_cycle + nc <- 1 + 3 * (N_state) + 1 + + tp <- matrix(rbeta(nr * nc, shape1 = 1, shape2 = 9), nrow = nr, ncol = nc) + tp[,1] <- 1:N_cycle + + testthat::expect_silent( res <- f_markov_M_prep(tp, N_state) ) + + testthat::expect_equal( + lapply(res, names), + list( + L1 = c("disc", "next", "death", "stay"), + L2 = c("disc", "next", "death", "stay"), + NT = c("death", "stay") + ) + ) + + testthat::expect_equal(res$L1$disc, tp[,2]) + testthat::expect_equal(res$L2$`next`, tp[,6]) + testthat::expect_equal(res$NT$death, tp[,8]) +}) + +testthat::test_that("f_markov_calcN works as expected", { + testthat::expect_equal(f_markov_calcN(4, 20), 143) +}) + +testthat::test_that("f_markov_topleftFinder works as expected", { + + # `nt` is the number of non-tunnel rows, i.e., the number of rows at the top + # of the transition matrix for health states that do not have tunnel states + nt <- 2 + + # `tun_n` is which of the tunnel-based health states the coordinates are being + # requested for + tun_n <- 1:4 + + # `TH` is the number of cycles (time horizon) + TH <- 5 + + expected <- list( + + # 2nd line + matrix( + c( + 3, 4, # On-treatment -> On-treatment (stay) + 3, 9, # On-treatment -> Off-treatment (disc) + 3, 13, # On-treatment -> Next line (next) + 8, 9, # Off-treatment -> Off-treatment (stay) + 8, 13 # Off-treatment -> Next line (next) + ), + ncol = 2, + byrow = TRUE, + dimnames = list( + c("L2_stay_on", "L2_disc_on", "L2_next_on", "L2_stay_off", "L2_next_off"), + c("row", "col") + ) + ), + + # 3rd line + matrix( + c( + 13, 14, # On-treatment -> On-treatment (stay) + 13, 19, # On-treatment -> Off-treatment (disc) + 13, 23, # On-treatment -> Next line (next) + 18, 19, # Off-treatment -> Off-treatment (stay) + 18, 23 # Off-treatment -> Next line (next) + ), + ncol = 2, + byrow = TRUE, + dimnames = list( + c("L3_stay_on", "L3_disc_on", "L3_next_on", "L3_stay_off", "L3_next_off"), + c("row", "col") + ) + ), + + # 4th line + matrix( + c( + 23, 24, # On-treatment -> On-treatment (stay) + 23, 29, # On-treatment -> Off-treatment (disc) + 23, 33, # On-treatment -> Next line (next) + 28, 29, # Off-treatment -> Off-treatment (stay) + 28, 33 # Off-treatment -> Next line (next) + ), + ncol = 2, + byrow = TRUE, + dimnames = list( + c("L4_stay_on", "L4_disc_on", "L4_next_on", "L4_stay_off", "L4_next_off"), + c("row", "col") + ) + ), + + # 5th line + matrix( + c( + 33, 34, # On-treatment -> On-treatment (stay) + 33, 39, # On-treatment -> Off-treatment (disc) + 33, 43, # On-treatment -> Next line (next) + 38, 39, # Off-treatment -> Off-treatment (stay) + 38, 43 # Off-treatment -> Next line (next) + ), + ncol = 2, + byrow = TRUE, + dimnames = list( + c("L5_stay_on", "L5_disc_on", "L5_next_on", "L5_stay_off", "L5_next_off"), + c("row", "col") + ) + ) + ) + + testthat::expect_equal( + expected, + lapply(tun_n, f_markov_topleftFinder, TH = TH, nt = nt) + ) + +}) + +testthat::test_that("f_markov_M_compiler works as expected", { + tp <- list( + L1 = list( + disc = c(0.20, 0.22, 0.24), + `next` = c(0.20, 0.25, 0.30), + death = c(0.05, 0.06, 0.07), + stay = c(0.55, 0.47, 0.39) + ), + L2 = list( + disc = c(0.06, 0.02, 0.12), + `next` = c(0.05, 0.08, 0.09), + death = c(0.08, 0.09, 0.10), + stay = c(0.81, 0.81, 0.69) + ), + L3 = list( + disc = c(0.05, 0.08, 0.06), + `next` = c(0.03, 0.04, 0.05), + death = c(0.12, 0.13, 0.14), + stay = c(0.80, 0.75, 0.75) + ), + NT = list( + death = c(0.15, 0.16, 0.17), + stay = c(0.85, 0.84, 0.83) + ) + ) + + # `n_lines` is the total number of lines of treatment, excluding BSC + n_lines <- 3 + + # `TH` is the time horizon (number of cycles) + TH <- 3 + + # `N` is the dimensionality of the transition matrix + N <- 18 + + # `nt` is the number of non-tunnel rows + nt <- 2 + + calculated <- f_markov_M_compiler(tp, n_lines, TH, N, nt) + + expected <- matrix(0, nrow = N, ncol = N) + + expected[1,1] <- tp$L1$stay[1] + expected[1,2] <- tp$L1$disc[1] + expected[1,3] <- tp$L1$`next`[1] + expected[1,N] <- tp$L1$death[1] + + expected[2,2] <- tp$L1$disc[1] + tp$L1$stay[1] + expected[2,3] <- tp$L1$`next`[1] + expected[2,N] <- tp$L1$death[1] + + diag(expected[3:4,4:5]) <- tp$L2$stay[1:2] + diag(expected[3:4,7:8]) <- tp$L2$disc[1:2] + expected[3:5,9] <- tp$L2$`next` + expected[3:5,N] <- tp$L2$death + + diag(expected[6:7,7:8]) <- tp$L2$disc[1:2] + tp$L2$stay[1:2] + expected[6:8,9] <- tp$L2$`next` + expected[6:8,N] <- tp$L2$death + + diag(expected[9:10,10:11]) <- tp$L3$stay[1:2] + diag(expected[9:10,13:14]) <- tp$L3$disc[1:2] + expected[9:11,15] <- tp$L3$`next` + expected[9:11,N] <- tp$L3$death + + diag(expected[12:13,13:14]) <- tp$L3$disc[1:2] + tp$L3$stay[1:2] + expected[12:14,15] <- tp$L3$`next` + expected[12:14,N] <- tp$L3$death + + diag(expected[15:16,16:17]) <- tp$NT$stay[1:2] + expected[15:17,N] <- tp$NT$death + + expected[N-1,N-1] <- 1 - expected[N-1,N] + expected[N,N] <- 1 + + testthat::expect_equal(as.matrix(calculated), expected) + +}) + +testthat::test_that("f_markov_sequenceExtrapolator works as expected", { + TH <- 5 + L1_tp <- list( + seq(0.90, 0.82, length.out = 5), # On 1st line treatment - stay + seq(0.07, 0.11, length.out = 5), # On 1st line treatment - discontinue + seq(0.02, 0.06, length.out = 5), # On 1st line treatment - jump to 2nd line treatment + rep(0.01, 5) # On 1st line treatment - die + ) + names(L1_tp) <- c("stay", "disc", "next", "death") + N <- 18 + + M <- Matrix::Matrix(data = 0, nrow = 18, ncol = 18, sparse = TRUE) + M[1,1:3] <- c(0.9, 0.07, 0.02) + M[2,2:3] <- c(0.97, 0.02) + M[,N] <- rep(c(0.01, 0.02, 0.1, 1), c(2, 10, 5, 1)) + diag(M[3:6,4:7]) <- 0.9 + M[7,7] <- 0.9 + diag(M[3:6,9:12]) <- 0.05 + M[7,12] <- 0.05 + M[3:12,13] <- 0.03 + diag(M[8:11,9:12]) <- 0.95 + M[12,12] <- 0.95 + diag(M[13:16,14:17]) <- 0.9 + M[17,17] <- 0.9 + + res <- f_markov_sequenceExtrapolator(L1_tp, TH, M, N) + + expected <- Matrix::Matrix(data = c( + scan( + text = " + 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9 0.07 0.02 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.01 + 0.792 0.1392 0.0291 0.018 0 0 0 0 0.001 0 0 0 0.0006 0 0 0 0 0.0201 + 0.68112 0.20352 0.037248 0.02619 0.0162 0 0 0 0.001455 0.00185 0 0 0.001443 0.00054 0 0 0 0.030434 + 0.5721408 0.2594208 0.044232 0.0335232 0.023571 0.01458 0 0 0.0018624 0.00269175 0.0025675 0 0.00248829 0.0012987 0.000486 0 0 0.04113756 + 0.469155456 0.304196832 0.049893696 0.0398088 0.03017088 0.0212139 0.013122 0 0.0022116 0.00344544 0.003735713 0.003168125 0.003690836 0.002239461 0.00116883 0.0004374 0 0.052341032 + ", + quiet = TRUE + ) + ), nrow = TH + 1, ncol = N, byrow = TRUE) + + # At the moment we are only not including the final row of `expected` but we + # might want to change `f_markov_sequenceExtrapolator` so that we do want to + # include it, in which case, just use `expected` in this test instead of + # `expected[1:TH,]` + testthat::expect_equal(res, expected[1:TH,]) + +}) + +testthat::test_that("f_markov_traceConsolidator works as expected", { + + full_trace <- new( + "dgCMatrix", + i = c(0L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 2L, 3L, 4L, 3L, + 4L, 4L, 2L, 3L, 4L, 3L, 4L, 4L, 2L, 3L, 4L, 3L, 4L, 4L, 1L, 2L, 3L, 4L), + p = c(0L, 5L, 9L, 13L, 16L, 18L, 19L, 19L, 19L, 22L, 24L, 25L, 25L, 28L, + 30L, 31L, 31L, 31L, 35L), + Dim = c(5L, 18L), + Dimnames = list(NULL, NULL), + x = c(1, 0.9, 0.792, 0.68112, 0.5721408, 0.07, 0.1392, 0.20352, 0.2594208, + 0.02, 0.0291, 0.037248, 0.044232, 0.018, 0.02619, 0.0335232, 0.0162, + 0.023571, 0.01458, 0.001, 0.001455, 0.0018624, 0.00185, 0.00269175, + 0.0025675, 6e-04, 0.001443, 0.00248829, 0.00054, 0.0012987, 0.000486, + 0.01, 0.0201, 0.030434, 0.04113756), + factors = list() + ) + + split_list = list( + L1_on = list(any_split = FALSE), + L1_off = list(any_split = FALSE), + L2_on = list(any_split = TRUE, t = c(2, 4)), + L2_off = list(any_split = FALSE), + BSC = list(any_split = TRUE, t = 3) + ) + + TH <- 5 + n_lines <- 2 + + res <- f_markov_traceConsolidator(full_trace, split_list, TH, n_lines) + + expected_os <- 1 - c(0, 0.01, 0.0201, 0.030434, 0.04113756, 0.052341032) + expected_L1 <- c(1, 0.97, 0.9312, 0.88464, 0.8315616, 0.773352288) + expected_full_lines <- array( + data = scan(quiet = TRUE, text = " + 1 0.9 0.792 0.68112 0.5721408 0.469155456 + 0 0.07 0.1392 0.20352 0.2594208 0.304196832 + 0 0.02 0.0471 0.079638 0.1159062 0.154209276 + 0 0 0.001 0.003305 0.00712165 0.012560878 + 0 0 0.0006 0.001983 0.00427299 0.007536527 + 0 0.01 0.0201 0.030434 0.04113756 0.052341032 + "), + dim = c(6, 6), + dimnames = list(NULL, c(L1_on = "L1_on", L1_off = "L1_off", L2_on = "L2_on", L2_off = "L2_off", BSC = "BSC", dead = "dead")) + ) + expected_split_pop <- list( + L2_on = array( + data = scan(quiet = TRUE, text = " + 0 0.02 0.0291 0.037248 0.044232 0.049893696 + 0 0 0.018 0.04239 0.0716742 0.10431558 + 0 0.02 0.0471 0.079638 0.1013262 0.119873376 + 0 0 0 0 0.01458 0.0343359 + "), + dim = c(6, 4), + dimnames = list(NULL, c("L2_on_split2_before", "L2_on_split2_after", "L2_on_split4_before", "L2_on_split4_after")) + ), + L2_off = NULL, + BSC = array( + data = scan(quiet = TRUE, text = " + 0 0 0.0006 0.001983 0.00378699 0.005930297 + 0 0 0 0 0.000486 0.00160623 + "), + dim = c(6, 2), + dimnames = list(NULL, c("BSC_split3_before", "BSC_split3_after")) + ) + ) + + testthat::expect_equal(res$OS, expected_os[1:TH]) + testthat::expect_equal(res$L1, expected_L1[1:TH]) + + # When a data.frame is subset, dimnames[[1]] and dimnames[[2]] for the result + # are no longer named vectors if they previously were + testthat::expect_equal(res$full_lines[1:TH,], expected_full_lines[1:TH,]) + + testthat::expect_equal(res$split_pop$L2_on, expected_split_pop$L2_on[1:TH,]) + testthat::expect_equal(res$split_pop$L2_off, expected_split_pop$L2_off) + testthat::expect_equal(res$split_pop$L2_BSC, expected_split_pop$L2_BSC[1:TH,]) + +}) diff --git a/tests/testthat/test-other_cause_mortality.R b/tests/testthat/test-other_cause_mortality.R new file mode 100644 index 0000000..95d42b8 --- /dev/null +++ b/tests/testthat/test-other_cause_mortality.R @@ -0,0 +1,135 @@ +require(here) +source(here::here("3_Functions", "survival", "other_cause_mortality.R")) +source(here::here("3_Functions", "survival", "Survival_functions.R")) + +## Testing Darren's functions ---- + +testthat::test_that("f_surv_getgpopOSFast works", { + + R_table_mort_lifeTable <- data.table( + age..x. = 0:100, + qx..males. = c(0.004224,0.000229,0.000127,0.000102,8.6e-05,7.4e-05,8.5e-05,6.7e-05,6.9e-05,6e-05,7.8e-05,7.7e-05,0.000102,0.000116,0.000129,0.000172,0.000205,0.000311,0.000402,0.000454,0.000525,0.000507,0.000497,0.000524,0.000556,0.000601,0.000607,0.000629,0.000681,0.000728,0.000771,0.000835,0.000858,0.000957,0.000989,0.0011,0.001155,0.001351,0.001317,0.001457,0.001606,0.0017,0.001848,0.002015,0.00221,0.002467,0.002646,0.002743,0.00296,0.003297,0.003577,0.003821,0.004075,0.004402,0.00472,0.005046,0.005593,0.00606,0.006695,0.007239,0.007912,0.008636,0.009601,0.010552,0.011171,0.01246,0.013853,0.014782,0.016348,0.017949,0.019238,0.020795,0.022782,0.025799,0.028776,0.0323,0.03573,0.040136,0.045188,0.050259,0.056143,0.06203,0.069197,0.077486,0.087296,0.097476,0.110221,0.122808,0.137654,0.154513,0.163489,0.183325,0.200869,0.222981,0.244606,0.268951,0.290494,0.31405,0.335507,0.369942,0.390506), + qx..females. = c(0.003503,0.000214,0.000114,9.5e-05,6.4e-05,7.4e-05,7.1e-05,5.5e-05,5.8e-05,5.1e-05,6.6e-05,5.5e-05,5.7e-05,8.7e-05,9.6e-05,0.000113,0.000131,0.000158,0.000218,0.000212,0.000187,0.000211,0.000245,0.000215,0.000223,0.00026,0.000257,0.00031,0.000314,0.000338,0.000387,0.000394,0.000468,0.000493,0.000585,0.000592,0.000678,0.000761,0.000792,0.000868,0.000924,0.001004,0.001102,0.001239,0.001345,0.001484,0.001625,0.001744,0.001966,0.002051,0.002234,0.002452,0.002581,0.002764,0.002964,0.003283,0.003637,0.003928,0.004367,0.004707,0.005247,0.005636,0.006451,0.006818,0.007379,0.008113,0.00877,0.009554,0.010602,0.011458,0.012895,0.013637,0.015499,0.017289,0.019688,0.021766,0.024397,0.027918,0.03142,0.035713,0.039611,0.045127,0.050197,0.057155,0.065142,0.073748,0.08452,0.095918,0.107798,0.121609,0.136466,0.153438,0.171026,0.189568,0.20787,0.230227,0.253171,0.277939,0.299649,0.31991,0.350742) + ) + t_yr <- seq(0, 40, by = 1/12) + + start_age <- 64.4 + p_male <- 0.709 + target <- c(1,0.999157028,0.998314787,0.997473277,0.996632497,0.995792447,0.994953126,0.994114533,0.993276668,0.992345274,0.99141478,0.990485185,0.98955649,0.988628693,0.987701794,0.98677579,0.985850682,0.984926469,0.984003149,0.983080723,0.982159188,0.981141039,0.980123984,0.97910802,0.978093146,0.977079362,0.976066665,0.975055055,0.97404453,0.97303509,0.972026733,0.971019457,0.970013263,0.968935629,0.967859231,0.966784069,0.965710139,0.964637442,0.963565975,0.962495737,0.961426726,0.960358942,0.959292382,0.958227045,0.957162931,0.955985793,0.954810149,0.953635998,0.952463337,0.951292165,0.95012248,0.948954279,0.947787562,0.946622325,0.945458567,0.944296286,0.94313548,0.941866018,0.940598324,0.939332396,0.93806823,0.936805824,0.935545176,0.934286283,0.933029143,0.931773753,0.930520111,0.929268213,0.928018059,0.92666497,0.925313909,0.923964874,0.922617862,0.921272868,0.919929891,0.918588928,0.917249974,0.915913027,0.914578084,0.913245142,0.911914198,0.910483657,0.909055431,0.907629514,0.906205905,0.904784598,0.90336559,0.901948877,0.900534456,0.899122322,0.897712472,0.896304902,0.894899608,0.893348523,0.891800199,0.890254629,0.88871181,0.887171735,0.885634401,0.884099803,0.882567934,0.88103879,0.879512367,0.877988659,0.876467662,0.874751932,0.873039657,0.87133083,0.869625445,0.867923493,0.866224968,0.864529863,0.862838171,0.861149885,0.859464997,0.857783502,0.856105391,0.854226089,0.852351022,0.850480178,0.84861355,0.846751126,0.844892897,0.843038854,0.841188986,0.839343285,0.837501741,0.835664343,0.833831084,0.831782012,0.82973812,0.827699393,0.825665818,0.823637381,0.821614071,0.819595872,0.817582772,0.815574758,0.813571817,0.811573936,0.8095811,0.807372136,0.805169362,0.80297276,0.800782313,0.798598002,0.796419811,0.794247722,0.792081717,0.789921779,0.78776789,0.785620034,0.783478193,0.781063694,0.77865682,0.776257548,0.773865854,0.771481712,0.769105098,0.766735988,0.764374358,0.762020185,0.759673442,0.757334108,0.755002158,0.752379395,0.749765972,0.747161856,0.744567014,0.74198141,0.739405012,0.736837786,0.734279699,0.731730717,0.729190809,0.726659939,0.724138077,0.721321818,0.718516761,0.715722861,0.712940073,0.71016835,0.70740765,0.704657926,0.701919135,0.699191232,0.696474173,0.693767914,0.691072412,0.688070867,0.685082671,0.682107762,0.679146082,0.676197569,0.673262165,0.670339809,0.667430443,0.664534007,0.661650444,0.658779694,0.6559217,0.652744721,0.649583445,0.646437793,0.643307686,0.640193046,0.637093795,0.634009856,0.630941153,0.627887607,0.624849144,0.621825688,0.618817163,0.615469389,0.612140108,0.608829217,0.605536612,0.602262191,0.59900585,0.59576749,0.592547008,0.589344304,0.586159278,0.582991832,0.579841865,0.576304927,0.572789983,0.569296893,0.565825521,0.562375728,0.558947378,0.555540335,0.552154466,0.548789636,0.545445712,0.542122563,0.538820056,0.535093949,0.531394082,0.52772027,0.524072324,0.520450061,0.516853296,0.513281848,0.509735534,0.506214176,0.502717596,0.499245615,0.495798058,0.491942352,0.488117145,0.484322192,0.480557249,0.476822077,0.473116437,0.46944009,0.465792804,0.462174343,0.458584477,0.455022977,0.451489615,0.447486304,0.443519056,0.439587543,0.435691437,0.431830416,0.428004159,0.424212349,0.420454671,0.416730814,0.413040469,0.40938333,0.405759094,0.401710667,0.397703209,0.3937363,0.389809525,0.385922473,0.382074736,0.378265912,0.374495603,0.370763413,0.367068954,0.363411837,0.359791681,0.355742965,0.351740462,0.347783637,0.343871964,0.34000492,0.336181989,0.332402663,0.328666436,0.324972812,0.321321297,0.317711407,0.31414266,0.310143514,0.306196,0.302299444,0.29845318,0.29465655,0.290908905,0.287209605,0.283558019,0.279953524,0.276395503,0.272883351,0.269416469,0.265704268,0.262043649,0.258433889,0.254874277,0.251364109,0.247902695,0.244489351,0.241123405,0.237804193,0.234531061,0.231303365,0.228120468,0.224562224,0.221059951,0.217612762,0.214219785,0.21088016,0.207593041,0.204357597,0.201173008,0.198038468,0.194953186,0.191916381,0.188927284,0.185653275,0.182436409,0.179275683,0.17617011,0.173118722,0.170120566,0.167174707,0.164280228,0.161436225,0.158641813,0.155896122,0.153198295,0.150223338,0.147306588,0.144446898,0.141643145,0.138894225,0.136199059,0.133556589,0.130965776,0.128425603,0.125935074,0.123493211,0.121099057,0.118494674,0.115946742,0.113454028,0.111015327,0.108629459,0.106295272,0.104011637,0.101777451,0.099591636,0.097453137,0.095360921,0.09331398,0.091075724,0.088891557,0.086760163,0.084680258,0.082650589,0.080669934,0.078737101,0.076850927,0.075010278,0.073214047,0.071461154,0.069750546,0.067910844,0.066119961,0.064376596,0.062679479,0.061027377,0.059419089,0.057853445,0.056329308,0.054845572,0.053401159,0.05199502,0.050626136,0.049155082,0.047726988,0.046340594,0.044994677,0.043688048,0.042419556,0.041188081,0.039992539,0.038831875,0.037705067,0.036611122,0.035549078,0.034428369,0.033343151,0.032292293,0.031274705,0.030289326,0.029335135,0.028411139,0.027516378,0.026649923,0.025810876,0.024998365,0.024211546,0.023367992,0.022554056,0.021768691,0.021010886,0.020279667,0.019574094,0.018893261,0.018236293,0.017602348,0.016990613,0.016400304,0.015830664,0.015230448,0.014653091,0.014097718,0.013563488,0.013049593,0.012555256,0.012079729,0.011622293,0.011182256,0.010758955,0.010351749,0.009960025,0.00958319,0.009220676,0.008871938,0.008536448,0.008213701,0.007903212,0.007604512,0.007317152,0.0070407,0.006774739,0.00651887,0.006272709,0.006035884,0.005808041,0.005588837,0.005377943,0.005175043,0.004979832,0.004792018,0.00461132,0.004437466,0.004270195,0.004109259,0.003954415,0.003805432,0.003662087,0.003524166,0.003391463,0.003263779,0.003140924,0.003022714,0.002908973,0.002799531,0.002694225,0.002592898,0.002495399,0.002401582,0.002311308,0.002224443,0.002140857) + + output <- f_surv_getgpopOSFast(p_male, start_age, t_yr, R_table_mort_lifeTable) + + testthat::expect_equal(target, output) + +}) + + +## Testing Tristan's functions ---- + +testthat::test_that("a single survival curve is adjusted", { + + v_x_lifetable <- seq(0, 100, by = 1) + v_h_lifetable <- exp(0.1 * v_x_lifetable - 10) + v_q_lifetable <- 1 - exp(-v_h_lifetable) + + v_t_os <- seq(0, 15, by = 1/52) + # h_{OS}(t) = 0.16 * (t/5) ^ (-0.2) + v_p_os <- pweibull(v_t_os, shape = 0.8, scale = 5, lower.tail = FALSE) + + # For a 60-year-old, the hazard from the OS curve exceeds the general + # mortality hazard for the full 15 years, so adjustment should have no effect + x <- adjust_single_survival(60, 1/52, v_t_os, v_p_os, v_x_lifetable, v_q_lifetable) + testthat::expect_equal(x$s_adjusted, v_p_os) + + # For a 70-year-old, the hazard from general mortality exceeds the hazard from + # the OS curve at a particular point. By graphical inspection this should be + # when t = 11, so we should get a warning and survival should be adjusted + # downwards. + testthat::expect_warning( + x <- adjust_single_survival(70, 1/52, v_t_os, v_p_os, v_x_lifetable, v_q_lifetable), + "Mortality rate from life table exceeds extrapolated mortality at time 11" + ) + testthat::expect_equal(x$s_adjusted[1:(11 * 52)], v_p_os[1:(11 * 52)]) + testthat::expect_lt(x$s_adjusted[11 * 52 + 1], v_p_os[11 * 52 + 1]) + testthat::expect_equal(x$q_adjusted[11 * 52 + 1], 1 - exp(-v_h_lifetable[v_x_lifetable == 81] / 52)) + +}) + + +testthat::test_that("the wrapper function works with aggregate patient characteristics", { + + # Create i and p in the global environment with the form we want, but make + # sure we don't clobber the real things if they are already loaded + old_i <- if ("i" %in% names(globalenv())) get("i", envir = globalenv()) else NULL + old_p <- if ("p" %in% names(globalenv())) get("p", envir = globalenv()) else NULL + + assign("i", list(R_table_mort_lifeTable = data.frame( + V1 = 0:100, + V2 = c(0.004233,0.000229,0.000127,0.000102,0.000086,0.000074,0.000085,0.000067,0.000069,0.00006,0.000078,0.000077,0.000102,0.000116,0.000129,0.000172,0.000205,0.000311,0.000402,0.000454,0.000525,0.000507,0.000498,0.000524,0.000556,0.000601,0.000607,0.00063,0.000681,0.000728,0.000771,0.000835,0.000859,0.000958,0.00099,0.0011,0.001155,0.001352,0.001317,0.001458,0.001607,0.001701,0.00185,0.002017,0.002212,0.00247,0.00265,0.002746,0.002964,0.003303,0.003584,0.003828,0.004084,0.004411,0.004731,0.005059,0.005609,0.006078,0.006718,0.007265,0.007944,0.008673,0.009648,0.010608,0.011234,0.012538,0.01395,0.014892,0.016483,0.018111,0.019424,0.021013,0.023044,0.026136,0.029196,0.03283,0.03638,0.040958,0.046233,0.051555,0.057764,0.064015,0.071677,0.080609,0.09128,0.10247,0.11665,0.130842,0.147828,0.16745,0.178043,0.201824,0.223296,0.250961,0.27869,0.310738,0.339857,0.372549,0.403135,0.453901,0.485254), + V3 = c(0.003503,0.000214,0.000114,0.000095,0.000064,0.000074,0.000071,0.000055,0.000058,0.000051,0.000066,0.000055,0.000057,0.000087,0.000096,0.000113,0.000131,0.000158,0.000218,0.000212,0.000187,0.000211,0.000245,0.000215,0.000223,0.00026,0.000257,0.00031,0.000314,0.000338,0.000387,0.000394,0.000468,0.000493,0.000585,0.000592,0.000678,0.000761,0.000792,0.000868,0.000924,0.001004,0.001102,0.001239,0.001345,0.001484,0.001625,0.001744,0.001966,0.002051,0.002234,0.002452,0.002581,0.002764,0.002964,0.003283,0.003637,0.003928,0.004367,0.004707,0.005247,0.005636,0.006451,0.006818,0.007379,0.008113,0.00877,0.009554,0.010602,0.011458,0.012895,0.013637,0.015499,0.017289,0.019688,0.021766,0.024397,0.027918,0.03142,0.035713,0.039611,0.045127,0.050197,0.057155,0.065142,0.073748,0.08452,0.095918,0.107798,0.121609,0.136466,0.153438,0.171026,0.189568,0.20787,0.230227,0.253171,0.277939,0.299649,0.31991,0.350742) + )), envir = globalenv()) + assign("p", list(basic = list(cl_y = 1/52)), envir = globalenv()) + + # Create a survivor function to use + t_os <- (0:208) * p$basic$cl_y + s_os <- exp(-0.1 * t_os ^ 0.5) + + # Test using mean age and proportion male + testthat::expect_warning( + adjusted <- adjust_survival(sex = 0.55, age = 75.2, survivor = cbind(t_os, s_os), .warn = TRUE) + ) + + # The values below were calculated in Excel + target <- data.frame( + t = t_os, + q_genmort = c(0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000543354,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000605518,0.000687121,0.000687116,0.000687109,0.000687103,0.000687097,0.00068709,0.000687083,0.000687076,0.000687068,0.00068706,0.000687053,0.000687044,0.000687036,0.000687028,0.000687019,0.00068701,0.000687001,0.000686992,0.000686982,0.000686973,0.000686963,0.000686953,0.000686942,0.000686932,0.000686921,0.000686911,0.0006869,0.000686889,0.000686877,0.000686866,0.000686854,0.000686843,0.000686831,0.000686819,0.000686806,0.000686794,0.000686782,0.000686769,0.000686756,0.000686743,0.00068673,0.000686717,0.000686703,0.00068669,0.000686676,0.000686662,0.000686648,0.000686634,0.00068662,0.000686606,0.000686591,0.000686576,0.000775984,0.000775962,0.00077594,0.000775919,0.000775897,0.000775875,0.000775853,0.000775832,0.00077581,0.000775788,0.000775766,0.000775745,0.000775723,0.000775701,0.000775679,0.000775658,0.000775636,0.000775614,0.000775592,0.000775571,0.000775549,0.000775527,0.000775505,0.000775484,0.000775462,0.00077544,0.000775418,0.000775396,0.000775375,0.000775353,0.000775331,0.000775309,0.000775288,0.000775266,0.000775244,0.000775222,0.000775201,0.000775179,0.000775157,0.000775135,0.000775114,0.000775092,0.00077507,0.000775048,0.000775026,0.000775005,0.000774983,0.000774961,0.000774939,0.000774918,0.000774896,0.000774874,0.000872263,0.000872238,0.000872213,0.000872188,0.000872163,0.000872138,0.000872112,0.000872087,0.000872062,0.000872037,NA_real_), + q_adjusted = c(0.013771794,0.005727643,0.00439791,0.003708892,0.003268321,0.002955252,0.002717958,0.002530052,0.002376459,0.002247856,0.002138121,0.002043046,0.001959631,0.001885672,0.001819506,0.001759854,0.001705711,0.00165628,0.001610913,0.001569083,0.001530351,0.001494355,0.001460786,0.001429382,0.001399922,0.001372211,0.001346084,0.001321396,0.001298018,0.001275839,0.001254759,0.001234692,0.001215557,0.001197286,0.001179815,0.001163087,0.001147051,0.001131661,0.001116875,0.001102653,0.001088961,0.001075768,0.001063042,0.001050758,0.00103889,0.001027415,0.001016313,0.001005563,0.000995147,0.000985048,0.00097525,0.00096574,0.000956502,0.000947524,0.000938795,0.000930302,0.000922036,0.000913987,0.000906145,0.000898501,0.000891047,0.000883776,0.000876681,0.000869753,0.000862987,0.000856377,0.000849916,0.000843599,0.000837421,0.000831377,0.000825462,0.000819672,0.000814001,0.000808447,0.000803005,0.000797671,0.000792443,0.000787315,0.000782286,0.000777353,0.000772511,0.000767759,0.000763093,0.000758512,0.000754012,0.000749591,0.000745247,0.000740978,0.000736781,0.000732655,0.000728597,0.000724606,0.00072068,0.000716817,0.00076301,0.000761325,0.000759665,0.00075803,0.00075642,0.000754834,0.000753272,0.000751732,0.000750215,0.000748719,0.000747244,0.00074579,0.000744357,0.000742942,0.000741548,0.000740171,0.000738814,0.000737474,0.000736152,0.000734846,0.000733558,0.000732286,0.00073103,0.000729789,0.000728564,0.000727354,0.000726158,0.000724977,0.00072381,0.000722657,0.000721517,0.000720391,0.000719277,0.000718176,0.000717088,0.000716011,0.000714947,0.000713894,0.000712853,0.000711823,0.000710804,0.000709796,0.000708799,0.000707812,0.000706835,0.000705869,0.000704912,0.000703965,0.000703027,0.000702099,0.00070118,0.00070027,0.000775984,0.000775962,0.00077594,0.000775919,0.000775897,0.000775875,0.000775853,0.000775832,0.00077581,0.000775788,0.000775766,0.000775745,0.000775723,0.000775701,0.000775679,0.000775658,0.000775636,0.000775614,0.000775592,0.000775571,0.000775549,0.000775527,0.000775505,0.000775484,0.000775462,0.00077544,0.000775418,0.000775396,0.000775375,0.000775353,0.000775331,0.000775309,0.000775288,0.000775266,0.000775244,0.000775222,0.000775201,0.000775179,0.000775157,0.000775135,0.000775114,0.000775092,0.00077507,0.000775048,0.000775026,0.000775005,0.000774983,0.000774961,0.000774939,0.000774918,0.000774896,0.000774874,0.000872263,0.000872238,0.000872213,0.000872188,0.000872163,0.000872138,0.000872112,0.000872087,0.000872062,0.000872037,NA_real_), + s_genmort = c(1,0.999456646,0.998913598,0.998370858,0.997828424,0.997286297,0.996744476,0.996202961,0.995661752,0.995120849,0.994580252,0.99403996,0.993499974,0.992960292,0.992420916,0.991881844,0.991343077,0.990804614,0.990266455,0.9897286,0.98919105,0.988653802,0.988116859,0.987580219,0.987043881,0.986507847,0.985972116,0.985436687,0.984901561,0.984366737,0.983832214,0.983297994,0.982764076,0.982230459,0.981697144,0.981164129,0.980631416,0.980099004,0.979566892,0.979035081,0.97850357,0.977972359,0.977441448,0.976850118,0.976259159,0.975668572,0.975078355,0.974488509,0.973899034,0.973309928,0.972721193,0.972132827,0.971544831,0.970957204,0.970369946,0.969783057,0.969196536,0.968610384,0.9680246,0.967439183,0.966854134,0.966269452,0.965685138,0.96510119,0.964517609,0.963934395,0.963351546,0.962769063,0.962186946,0.961605195,0.961023808,0.960442787,0.95986213,0.959281838,0.95870191,0.958122346,0.957543146,0.956964309,0.956385836,0.955807726,0.955229978,0.954652593,0.954075571,0.95349891,0.952922612,0.952346675,0.9517711,0.951195885,0.950621032,0.95004654,0.949472408,0.948898636,0.948325224,0.947752172,0.947179479,0.946529964,0.945880911,0.945232318,0.944584185,0.943936513,0.943289301,0.942642548,0.941996255,0.94135042,0.940705044,0.940060126,0.939415666,0.938771663,0.938128118,0.937485029,0.936842397,0.936200222,0.935558502,0.934917237,0.934276428,0.933636073,0.932996173,0.932356728,0.931717736,0.931079197,0.930441112,0.92980348,0.9291663,0.928529572,0.927893297,0.927257472,0.926622099,0.925987177,0.925352706,0.924718684,0.924085113,0.923451991,0.922819318,0.922187094,0.921555319,0.920923992,0.920293113,0.919662681,0.919032697,0.918403159,0.917774069,0.917145424,0.916517225,0.915889473,0.915262165,0.914635302,0.914008884,0.913301397,0.912594477,0.911888124,0.911182337,0.910477117,0.909772462,0.909068373,0.908364848,0.907661888,0.906959491,0.906257657,0.905556387,0.904855679,0.904155533,0.903455948,0.902756924,0.902058461,0.901360558,0.900663214,0.89996643,0.899270204,0.898574537,0.897879427,0.897184875,0.896490879,0.89579744,0.895104556,0.894412228,0.893720456,0.893029237,0.892338573,0.891648462,0.890958905,0.8902699,0.889581448,0.888893547,0.888206197,0.887519399,0.886833151,0.886147453,0.885462304,0.884777704,0.884093653,0.88341015,0.882727195,0.882044787,0.881362926,0.880681612,0.880000843,0.879320619,0.878640941,0.877961807,0.877197821,0.876434522,0.875671909,0.874909982,0.87414874,0.873388182,0.872628308,0.871869117,0.871110609,0.870352783), + s_adjusted = c(1,0.986228206,0.980579443,0.976266943,0.972646074,0.969467155,0.966602135,0.963974951,0.961536044,0.959250993,0.957094735,0.955048351,0.953097143,0.951229425,0.949435718,0.947708214,0.946040386,0.944426714,0.942862479,0.94134361,0.939866564,0.938428238,0.937025893,0.935657099,0.934319687,0.933011713,0.931731423,0.930477234,0.929247706,0.928041526,0.926857495,0.925694512,0.924551565,0.923427719,0.922322112,0.921233943,0.920162467,0.919106994,0.918066876,0.91704151,0.916030331,0.91503281,0.914048447,0.913076775,0.912117353,0.911169763,0.910233613,0.909308531,0.908394165,0.90749018,0.906596258,0.9057121,0.904837418,0.903971939,0.903115404,0.902267564,0.901428182,0.900597033,0.899773899,0.898958573,0.898150858,0.897350563,0.896557506,0.895771512,0.894992412,0.894220045,0.893454256,0.892694895,0.891941818,0.891194887,0.890453968,0.889718932,0.888989654,0.888266015,0.887547899,0.886835194,0.886127791,0.885425586,0.884728477,0.884036366,0.883349158,0.882666761,0.881989085,0.881316046,0.880647557,0.879983538,0.879323911,0.878668597,0.878017524,0.877370617,0.876727807,0.876089026,0.875454206,0.874823284,0.874196196,0.873529175,0.872864135,0.872201051,0.871539897,0.870880646,0.870223276,0.869567761,0.868914079,0.868262207,0.867612123,0.866963805,0.866317231,0.865672382,0.865029238,0.864387777,0.863747982,0.863109833,0.862473312,0.861838401,0.861205082,0.860573339,0.859943153,0.859314509,0.858687391,0.858061782,0.857437667,0.856815032,0.85619386,0.855574139,0.854955852,0.854338987,0.853723529,0.853109465,0.852496782,0.851885468,0.851275508,0.850666891,0.850059605,0.849453638,0.848848977,0.848245612,0.84764353,0.847042721,0.846443174,0.845844879,0.845247823,0.844651998,0.844057393,0.843463997,0.842871802,0.842280797,0.841690973,0.841037835,0.840385221,0.839733132,0.839081568,0.838430527,0.83778001,0.837130016,0.836480544,0.835831594,0.835183166,0.834535259,0.833887873,0.833241007,0.832594661,0.831948835,0.831303527,0.830658738,0.830014468,0.829370715,0.82872748,0.828084761,0.827442559,0.826800873,0.826159702,0.825519047,0.824878907,0.824239281,0.823600168,0.82296157,0.822323484,0.821685911,0.82104885,0.820412301,0.819776263,0.819140736,0.81850572,0.817871214,0.817237218,0.81660373,0.815970752,0.815338282,0.81470632,0.814074866,0.813443918,0.812813478,0.812183543,0.811554115,0.810925192,0.810296774,0.809668861,0.809041452,0.808414547,0.807709396,0.807004881,0.806301001,0.805597755,0.804895142,0.804193163,0.803491816,0.802791101,0.802091017,0.801391564), + prop_male.genmort = c(0.55,0.549945861,0.54989172,0.549837578,0.549783435,0.549729291,0.549675146,0.549620999,0.549566852,0.549512703,0.549458553,0.549404401,0.549350249,0.549296096,0.549241941,0.549187785,0.549133628,0.54907947,0.54902531,0.54897115,0.548916988,0.548862825,0.548808661,0.548754496,0.54870033,0.548646162,0.548591994,0.548537824,0.548483653,0.548429481,0.548375308,0.548321133,0.548266958,0.548212781,0.548158603,0.548104424,0.548050244,0.547996063,0.547941881,0.547887697,0.547833513,0.547779327,0.54772514,0.547666264,0.547607386,0.547548507,0.547489627,0.547430745,0.547371863,0.547312978,0.547254093,0.547195206,0.547136318,0.547077428,0.547018537,0.546959645,0.546900752,0.546841857,0.546782961,0.546724064,0.546665165,0.546606265,0.546547364,0.546488461,0.546429557,0.546370652,0.546311745,0.546252838,0.546193929,0.546135018,0.546076106,0.546017193,0.545958279,0.545899364,0.545840447,0.545781529,0.545722609,0.545663689,0.545604767,0.545545843,0.545486919,0.545427993,0.545369066,0.545310138,0.545251208,0.545192277,0.545133345,0.545074412,0.545015477,0.544956541,0.544897604,0.544838665,0.544779726,0.544720785,0.544661843,0.544597431,0.544533017,0.544468602,0.544404186,0.544339768,0.544275348,0.544210927,0.544146505,0.544082081,0.544017656,0.543953229,0.543888801,0.543824371,0.54375994,0.543695507,0.543631073,0.543566637,0.5435022,0.543437762,0.543373322,0.54330888,0.543244437,0.543179993,0.543115547,0.5430511,0.542986651,0.542922201,0.54285775,0.542793297,0.542728842,0.542664387,0.542599929,0.542535471,0.542471011,0.542406549,0.542342086,0.542277622,0.542213156,0.542148689,0.54208422,0.54201975,0.541955279,0.541890806,0.541826332,0.541761856,0.541697379,0.541632901,0.541568421,0.541503939,0.541439457,0.541374973,0.541310488,0.541236898,0.541163306,0.541089713,0.541016118,0.540942521,0.540868922,0.540795322,0.54072172,0.540648116,0.54057451,0.540500902,0.540427293,0.540353682,0.540280069,0.540206455,0.540132838,0.54005922,0.5399856,0.539911979,0.539838355,0.53976473,0.539691103,0.539617475,0.539543845,0.539470213,0.539396579,0.539322943,0.539249306,0.539175667,0.539102027,0.539028385,0.538954741,0.538881095,0.538807447,0.538733798,0.538660148,0.538586495,0.538512841,0.538439185,0.538365528,0.538291868,0.538218208,0.538144545,0.538070881,0.537997215,0.537923548,0.537849878,0.537776208,0.537702535,0.537628861,0.537555185,0.537481508,0.537402314,0.537323119,0.537243921,0.537164722,0.537085521,0.537006318,0.536927113,0.536847906,0.536768697,0.536689487 ), + prop_male.adjusted = c(0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.549977485,0.549954043,0.549929689,0.549904436,0.549878299,0.54985129,0.549823422,0.549794709,0.549765163,0.549734795,0.549703618,0.549671642,0.54963888,0.549605342,0.549571039,0.549535981,0.549500179,0.549463643,0.549426381,0.549388405,0.549349723,0.549310345,0.549270279,0.549229533,0.549188118,0.549146041,0.54910331,0.549059933,0.549015919,0.548971275,0.548926008,0.548880126,0.548833636,0.548786546,0.548738862,0.548690591,0.548641739,0.548592315,0.548542323,0.548491771,0.548440664,0.548389008,0.54833681,0.548284076,0.548230811,0.548177021,0.548122711,0.548067887,0.548012555,0.547956719,0.547900385,0.547843558,0.547770141,0.547696721,0.5476233,0.547549877,0.547476451,0.547403023,0.547329594,0.547256162,0.547182728,0.547109292,0.547035855,0.546962415,0.546888973,0.546815529,0.546742083,0.546668635,0.546595185,0.546521732,0.546448278,0.546374822,0.546301364,0.546227904,0.546154441,0.546080977,0.546007511,0.545934043,0.545860572,0.5457871,0.545713626,0.54564015,0.545566672,0.545493191,0.545419709,0.545346225,0.545272739,0.545199251,0.545125761,0.545052269,0.544978775,0.544905279,0.544831781,0.544758281,0.544684779,0.544611275,0.544537769,0.544464262,0.544390752,0.544317241,0.544243727,0.544170212,0.544096695,0.544023175,0.543944151,0.543865125,0.543786097,0.543707066,0.543628033,0.543548998,0.543469961,0.543390922,0.54331188,0.543232837) + ) + + testthat::expect_equal(adjusted, target, tolerance = 1e-6) + + # Restore or remove i and p + if (!is.null(old_i)) assign("i", old_i, envir = globalenv()) else rm("i", envir = globalenv()) + if (!is.null(old_p)) assign("p", old_p, envir = globalenv()) else rm("p", envir = globalenv()) + +}) + + +testthat::test_that("the wrapper function works with individual patient characteristics", { + + i <- list(R_table_mort_lifeTable = data.frame( + V1 = 0:100, + V2 = c(0.004233,0.000229,0.000127,0.000102,0.000086,0.000074,0.000085,0.000067,0.000069,0.00006,0.000078,0.000077,0.000102,0.000116,0.000129,0.000172,0.000205,0.000311,0.000402,0.000454,0.000525,0.000507,0.000498,0.000524,0.000556,0.000601,0.000607,0.00063,0.000681,0.000728,0.000771,0.000835,0.000859,0.000958,0.00099,0.0011,0.001155,0.001352,0.001317,0.001458,0.001607,0.001701,0.00185,0.002017,0.002212,0.00247,0.00265,0.002746,0.002964,0.003303,0.003584,0.003828,0.004084,0.004411,0.004731,0.005059,0.005609,0.006078,0.006718,0.007265,0.007944,0.008673,0.009648,0.010608,0.011234,0.012538,0.01395,0.014892,0.016483,0.018111,0.019424,0.021013,0.023044,0.026136,0.029196,0.03283,0.03638,0.040958,0.046233,0.051555,0.057764,0.064015,0.071677,0.080609,0.09128,0.10247,0.11665,0.130842,0.147828,0.16745,0.178043,0.201824,0.223296,0.250961,0.27869,0.310738,0.339857,0.372549,0.403135,0.453901,0.485254), + V3 = c(0.003503,0.000214,0.000114,0.000095,0.000064,0.000074,0.000071,0.000055,0.000058,0.000051,0.000066,0.000055,0.000057,0.000087,0.000096,0.000113,0.000131,0.000158,0.000218,0.000212,0.000187,0.000211,0.000245,0.000215,0.000223,0.00026,0.000257,0.00031,0.000314,0.000338,0.000387,0.000394,0.000468,0.000493,0.000585,0.000592,0.000678,0.000761,0.000792,0.000868,0.000924,0.001004,0.001102,0.001239,0.001345,0.001484,0.001625,0.001744,0.001966,0.002051,0.002234,0.002452,0.002581,0.002764,0.002964,0.003283,0.003637,0.003928,0.004367,0.004707,0.005247,0.005636,0.006451,0.006818,0.007379,0.008113,0.00877,0.009554,0.010602,0.011458,0.012895,0.013637,0.015499,0.017289,0.019688,0.021766,0.024397,0.027918,0.03142,0.035713,0.039611,0.045127,0.050197,0.057155,0.065142,0.073748,0.08452,0.095918,0.107798,0.121609,0.136466,0.153438,0.171026,0.189568,0.20787,0.230227,0.253171,0.277939,0.299649,0.31991,0.350742) + )) + p <- list(basic = list(cl_y = 1/52)) + + # Create a survivor function to use + t_os <- (0:208) * p$basic$cl_y + s_os <- exp(-0.05 * t_os ^ 0.5) + + # Test using individual ages and sexes + ages <- c(57.5, 72.6, 67.9, 71.1) + sexes <- c("M", "M", "F", "F") + testthat::expect_warning( + adjusted <- adjust_survival(sex = sexes, age = ages, survivor = data.frame(t = t_os, s = s_os), .i = i, .p = p, .warn = TRUE) + ) + + # The values below were calculated in Excel + target <- data.frame( + t = t_os, + q_genmort = c(0.000253523,0.000253523,0.000253523,0.000253523,0.000253523,0.000253523,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000258611,0.000273844,0.000273844,0.000273844,0.000273844,0.000273844,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000276941,0.000286022,0.000286022,0.000286021,0.00028602,0.000286019,0.000286018,0.000286016,0.000286014,0.000286012,0.000286009,0.000286007,0.000290164,0.000290161,0.000290158,0.000290155,0.000290151,0.000290147,0.000290143,0.000290139,0.000290134,0.00029013,0.000290125,0.00029012,0.000290115,0.00029011,0.000290104,0.000305203,0.000305192,0.000305181,0.00030517,0.000305158,0.000307796,0.000307785,0.000307773,0.000307761,0.000307749,0.000307736,0.000307724,0.000307711,0.000307699,0.000307686,0.000307673,0.00030766,0.000307646,0.000307633,0.000307619,0.000307606,0.000307592,0.000307578,0.000307564,0.00030755,0.000307536,0.000316282,0.000316268,0.000316254,0.00031624,0.000316225,0.000316211,0.000316196,0.000316182,0.000316167,0.000316152,0.000316137,0.00032313,0.000323116,0.000323101,0.000323086,0.000323071,0.000323056,0.000323041,0.000323026,0.00032301,0.000322995,0.000322979,0.000322964,0.000322948,0.000322932,0.000322916,0.000340747,0.000340723,0.000340698,0.000340673,0.000340648,0.000343924,0.000343899,0.000343874,0.000343849,0.000343824,0.000343799,0.000343774,0.000343748,0.000343723,0.000343698,0.000343672,0.000343647,0.000343621,0.000343595,0.000343569,0.000343543,0.000343517,0.000343491,0.000343465,0.000343439,0.000343413,0.000355186,0.00035516,0.000355134,0.000355108,0.000355081,0.000355055,0.000355029,0.000355002,0.000354976,0.000354949,0.000354922,0.000358535,0.000358509,0.000358483,0.000358456,0.00035843,0.000358403,0.000358376,0.00035835,0.000358323,0.000358296,0.000358269,0.000358242,0.000358215,0.000358188,0.000358161,0.000375405,0.000375367,0.00037533,0.000375293,0.000375255,0.000378785,0.000378748,0.000378711,0.000378674,0.000378637,0.0003786,0.000378563,0.000378525,0.000378488,0.000378451,0.000378414,0.000378376,0.000378339,0.000378302,0.000378264,0.000378227,0.000378189,0.000378152,0.000378114,0.000378077,0.000378039,0.000388252,0.000388214,0.000388176,0.000388138,0.0003881,NA_real_ ), + q_adjusted = c(0.006909769,0.002867934,0.002201378,0.001856169,0.001635498,0.001478719,0.001359904,0.001265827,0.001188936,0.00112456,0.001069632,0.001022045,0.000980296,0.000943281,0.000910167,0.000880314,0.00085322,0.000828483,0.000805781,0.000784849,0.000765469,0.000747457,0.00073066,0.000714947,0.000700206,0.000686341,0.000673269,0.000660916,0.00064922,0.000638123,0.000627577,0.000617536,0.000607963,0.000598822,0.000590082,0.000581713,0.00057369,0.000565991,0.000558593,0.000551479,0.000544629,0.000538029,0.000531662,0.000525517,0.00051958,0.00051384,0.000508507,0.000504473,0.000500565,0.000496777,0.000493101,0.000489532,0.000486066,0.000482698,0.000479422,0.000476236,0.000473134,0.000470113,0.00046717,0.000464302,0.000461505,0.000458776,0.000456112,0.000453512,0.000450972,0.000448491,0.000446065,0.000443693,0.000441374,0.000439104,0.000436883,0.000434708,0.000432578,0.000445596,0.000443548,0.000441541,0.000439573,0.000437643,0.00043575,0.000433891,0.000432068,0.000430277,0.000428519,0.000426793,0.000425096,0.000423429,0.000421791,0.000420181,0.000418597,0.00041704,0.000415508,0.000414001,0.000412519,0.000411059,0.000409623,0.000408209,0.000406816,0.000405445,0.000404094,0.000402764,0.000401453,0.000400161,0.000398887,0.000397632,0.000396394,0.000395174,0.000393971,0.000393037,0.000392253,0.00039148,0.000390717,0.000389963,0.00038922,0.000388486,0.000387761,0.000387046,0.00038634,0.000385642,0.000384953,0.000384272,0.0003836,0.000382935,0.000382279,0.00038163,0.000380989,0.000398202,0.000397568,0.00039694,0.00039632,0.000395707,0.0003951,0.000394499,0.000393906,0.000393318,0.000392737,0.000392161,0.000391592,0.000391029,0.000390471,0.000389919,0.000389372,0.000388831,0.000388295,0.000387765,0.000387239,0.000386719,0.000386203,0.000385693,0.000385187,0.000384686,0.00038419,0.000395497,0.000395011,0.000394529,0.000394051,0.000393577,0.000393108,0.000392642,0.000392181,0.000391724,0.00039127,0.000390821,0.000390375,0.000389933,0.000389495,0.00038906,0.000388629,0.000388202,0.000387778,0.000387357,0.00038694,0.000386526,0.000386138,0.000385922,0.000385708,0.000385496,0.000385285,0.000402347,0.000402129,0.000401913,0.000401698,0.000401485,0.000401273,0.000401062,0.000400853,0.000400646,0.000400439,0.000400234,0.00040003,0.000399828,0.000399626,0.000399426,0.000399228,0.00039903,0.000398834,0.000398638,0.000398444,0.000398251,0.00039806,0.000397869,0.000397679,0.000397491,0.000397303,0.000407367,0.000407182,0.000406998,0.000406814,0.000406632,NA_real_), + s_genmort = c(1,0.999746477,0.999493034,0.999239671,0.998986387,0.998733183,0.998480058,0.99822193,0.997963882,0.997705917,0.997448032,0.997190229,0.996932508,0.996674867,0.996417308,0.99615983,0.995902433,0.995645118,0.995387883,0.99513073,0.994873657,0.994616666,0.994344666,0.994072761,0.993800951,0.993529237,0.993257618,0.992983007,0.992708492,0.992434074,0.992159751,0.991885524,0.991611393,0.991337357,0.991063418,0.990789574,0.990515825,0.990242173,0.989968616,0.989695154,0.989421788,0.989148518,0.988875343,0.988602263,0.988329279,0.98805639,0.987783596,0.987510897,0.987229324,0.986947852,0.986666479,0.986385206,0.986104034,0.985822962,0.985541989,0.985261117,0.984980344,0.984699672,0.984419099,0.984134514,0.983850031,0.983565649,0.983281369,0.98299719,0.982713113,0.982429136,0.982145261,0.981861488,0.981577815,0.981294244,0.981010773,0.980727404,0.980444136,0.980160968,0.979863313,0.979565774,0.979268352,0.978971047,0.978673858,0.978374163,0.978074585,0.977775125,0.977475781,0.977176554,0.976877444,0.976578451,0.976279574,0.975980814,0.975682171,0.975383644,0.975085234,0.974786941,0.974488763,0.974190702,0.973892758,0.973594929,0.973297217,0.972999621,0.972702141,0.972404777,0.972099025,0.971793393,0.971487883,0.971182494,0.970877227,0.97057208,0.970267055,0.96996215,0.969657367,0.969352704,0.969048162,0.96873691,0.968425781,0.968114777,0.967803896,0.967493139,0.967182506,0.966871996,0.96656161,0.966251347,0.965941208,0.965631192,0.9653213,0.965011531,0.964701885,0.964392363,0.964066087,0.963739954,0.963413965,0.96308812,0.962762417,0.962433625,0.962104977,0.961776472,0.961448112,0.961119896,0.960791823,0.960463894,0.960136108,0.959808467,0.959480968,0.959153613,0.958826402,0.958499333,0.958172408,0.957845626,0.957518987,0.957192491,0.956866138,0.956539928,0.95621386,0.955887935,0.955550928,0.955214072,0.954877366,0.954540811,0.954204406,0.953868151,0.953532046,0.953196092,0.952860287,0.952524632,0.952189128,0.951850287,0.951511598,0.95117306,0.950834674,0.950496439,0.950158355,0.949820422,0.94948264,0.949145009,0.948807529,0.9484702,0.948133022,0.947795994,0.947459116,0.94712239,0.94676981,0.946417402,0.946065166,0.945713102,0.945361209,0.945006042,0.944651048,0.944296226,0.943941576,0.943587099,0.943232794,0.942878661,0.9425247,0.942170911,0.941817293,0.941463847,0.941110573,0.940757471,0.940404539,0.94005178,0.939699191,0.939346773,0.938994526,0.938642451,0.938290546,0.937938811,0.937577695,0.937216757,0.936855997,0.936495415,0.936135011 ), + s_adjusted = c(1,0.993090231,0.990242113,0.988062216,0.986228206,0.984615232,0.983159262,0.98182226,0.980579443,0.979413596,0.978312187,0.977265752,0.976266943,0.975309912,0.974389921,0.973503063,0.972646074,0.971816193,0.97101106,0.970228638,0.969467155,0.968725058,0.968000978,0.967293698,0.966602135,0.965925314,0.96526236,0.964612479,0.963974951,0.963349119,0.962734384,0.962130195,0.961536044,0.960951466,0.960376026,0.959809326,0.959250993,0.95870068,0.958158064,0.957622843,0.957094735,0.956573473,0.956058809,0.955550509,0.955048351,0.954552127,0.95406164,0.953576493,0.953095439,0.952618353,0.952145114,0.951675611,0.951209735,0.950747384,0.950288461,0.949832871,0.949380527,0.948931343,0.948485237,0.948042133,0.947601955,0.947164633,0.946730097,0.946298282,0.945869124,0.945442563,0.945018541,0.944597002,0.94417789,0.943761155,0.943346746,0.942934614,0.942524713,0.942116998,0.941697195,0.941279506,0.940863893,0.940450314,0.940038733,0.939629111,0.939221414,0.938815607,0.938411656,0.938009528,0.937609193,0.937210618,0.936813776,0.936418636,0.936025172,0.935633354,0.935243158,0.934854557,0.934467526,0.93408204,0.933698077,0.933315613,0.932934625,0.932555092,0.932176992,0.931800305,0.931425009,0.931051086,0.930678516,0.93030728,0.929937361,0.929568739,0.929201397,0.928835319,0.928470252,0.928106057,0.927742722,0.927380238,0.927018594,0.92665778,0.926297786,0.925938603,0.925580223,0.925222634,0.924865829,0.924509799,0.924154536,0.92380003,0.923446275,0.923093261,0.92274098,0.922389426,0.922022129,0.921655563,0.921289721,0.920924595,0.920560179,0.920196466,0.919833449,0.919471121,0.919109477,0.918748509,0.918388211,0.918028577,0.917669602,0.917311279,0.916953602,0.916596565,0.916240164,0.915884393,0.915529245,0.915174716,0.914820801,0.914467494,0.914114791,0.913762685,0.913411174,0.91306025,0.912699137,0.912338611,0.911978668,0.911619302,0.911260509,0.910902286,0.910544627,0.910187529,0.909830987,0.909474997,0.909119555,0.908764657,0.908410299,0.908056478,0.907703189,0.907350429,0.906998194,0.90664648,0.906295284,0.905944603,0.905594432,0.905244747,0.904895393,0.904546367,0.904197668,0.903849294,0.903485634,0.903122316,0.902759339,0.902396703,0.902034404,0.901672442,0.901310815,0.900949522,0.90058856,0.900227929,0.899867627,0.899507653,0.899148005,0.898788682,0.898429682,0.898071004,0.897712647,0.897354609,0.896996889,0.896639486,0.896282398,0.895925624,0.895569163,0.895213014,0.894857175,0.894501645,0.894137255,0.893773178,0.893409415,0.893045963,0.892682822), + prop_male.genmort = c(0.5,0.499985389,0.499970785,0.499956187,0.499941596,0.499927011,0.499912432,0.499900406,0.499888386,0.499876372,0.499864366,0.499852366,0.499840373,0.499828386,0.499816406,0.499804432,0.499792466,0.499780505,0.499768552,0.499756605,0.499744665,0.499732731,0.499713212,0.499693702,0.499674202,0.499654711,0.499635229,0.499614201,0.499593182,0.499572172,0.499551171,0.499530178,0.499509194,0.499488219,0.499467253,0.499446295,0.499425347,0.499404407,0.499383476,0.499362553,0.49934164,0.499320735,0.499299839,0.499278952,0.499258074,0.499237204,0.499216343,0.499195492,0.499179184,0.499162884,0.499146593,0.499130311,0.499114037,0.499097771,0.499081514,0.499065265,0.499049025,0.499032793,0.49901657,0.49900244,0.498988318,0.498974206,0.498960101,0.498946006,0.498931919,0.49891784,0.498903771,0.498889709,0.498875657,0.498861613,0.498847578,0.498833551,0.498819533,0.498805524,0.498784061,0.498762609,0.49874117,0.498719742,0.498698326,0.498675578,0.498652841,0.498630115,0.4986074,0.498584697,0.498562005,0.498539324,0.498516654,0.498493995,0.498471347,0.49844871,0.498426085,0.498403471,0.498380868,0.498358276,0.498335695,0.498313125,0.498290567,0.49826802,0.498245483,0.498222958,0.498204803,0.498186659,0.498168525,0.498150402,0.498132289,0.498114188,0.498096097,0.498078017,0.498059947,0.498041888,0.49802384,0.498009315,0.4979948,0.497980297,0.497965805,0.497951324,0.497936853,0.497922394,0.497907946,0.497893509,0.497879084,0.497864669,0.497850265,0.497835872,0.497821491,0.49780712,0.49778397,0.497760834,0.497737714,0.497714609,0.497691519,0.497666757,0.497642009,0.497617276,0.497592558,0.497567853,0.497543164,0.497518488,0.497493828,0.497469181,0.497444549,0.497419932,0.497395329,0.49737074,0.497346166,0.497321607,0.497297062,0.497272531,0.497248015,0.497223513,0.497199025,0.497174553,0.497155934,0.497137329,0.497118738,0.497100161,0.497081598,0.497063048,0.497044513,0.497025991,0.497007483,0.496988988,0.496970508,0.496953861,0.496937228,0.496920609,0.496904005,0.496887414,0.496870837,0.496854275,0.496837726,0.496821191,0.496804671,0.496788164,0.496771672,0.496755193,0.496738729,0.496722278,0.496697335,0.49667241,0.496647503,0.496622616,0.496597747,0.49657106,0.496544392,0.496517741,0.496491108,0.496464493,0.496437895,0.496411315,0.496384753,0.496358208,0.496331681,0.496305172,0.49627868,0.496252206,0.49622575,0.496199311,0.49617289,0.496146487,0.496120102,0.496093734,0.496067384,0.496041051,0.49601979,0.495998546,0.495977319,0.495956109,0.495934915 ), + prop_male.adjusted = c(0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.499999889,0.499999106,0.499997671,0.499995604,0.499992925,0.49998965,0.499985798,0.499981384,0.499976424,0.499970933,0.499964925,0.499958414,0.499951412,0.499943933,0.499935988,0.499927588,0.499918745,0.499909469,0.49989977,0.499889658,0.499879143,0.499868233,0.499856937,0.499845264,0.499833222,0.499820819,0.499808061,0.499787399,0.499766398,0.499745066,0.499723408,0.499701433,0.499679145,0.499656551,0.499633656,0.499610467,0.499586989,0.499563227,0.499539186,0.499514872,0.499490288,0.499465441,0.499440334,0.499414973,0.499389361,0.499363502,0.499337401,0.499311062,0.499284489,0.499257686,0.499230655,0.499203401,0.499175928,0.499148239,0.499120337,0.499092225,0.499063907,0.499035385,0.499006664,0.498977745,0.498948631,0.498919453,0.498890279,0.49886111,0.498831945,0.498802785,0.498773629,0.498744477,0.498715331,0.498686189,0.498657051,0.498627919,0.498598791,0.498569669,0.498540551,0.498511438,0.49848233,0.498453228,0.49842413,0.498386082,0.498348042,0.49831001,0.498271986,0.49823397,0.498195962,0.498157962,0.498119971,0.498081987,0.498044013,0.498006046,0.497968088,0.497930139,0.497892198,0.497854265,0.497816342,0.497778427,0.49774052,0.497702623,0.497664734,0.497626854,0.497588984,0.497551122,0.497513269,0.497475425,0.49743759,0.497405636,0.497373691,0.497341754,0.497309826,0.497277907,0.497245997,0.497214095,0.497182203,0.497150319,0.497118445,0.497086579,0.497054723,0.497022875,0.496991037,0.496959208,0.496927388,0.496895577,0.496863775,0.496831983,0.496800199,0.496768425,0.496736672,0.496705024,0.496673479,0.496642038,0.496610699,0.496570764,0.496530934,0.496491207,0.496451584,0.496412063,0.496372645,0.496333327,0.49629411,0.496254993,0.496215975,0.496177056,0.496138235,0.496099511,0.496060884,0.496022352,0.495983917,0.495945576,0.49590733,0.495869178,0.495831119,0.495793152,0.495755278,0.495717495,0.495679803,0.495642202,0.495604691,0.49557235,0.495540099,0.495507934,0.495475858,0.495443868) + ) + + testthat::expect_equal(adjusted, target, tolerance = 1e-6) + +}) \ No newline at end of file diff --git a/tests/testthat/test-severity_modifier.R b/tests/testthat/test-severity_modifier.R new file mode 100644 index 0000000..de82576 --- /dev/null +++ b/tests/testthat/test-severity_modifier.R @@ -0,0 +1,79 @@ +source(here::here("3_Functions", "excel", "extract.R")) +source(here::here("3_Functions", "misc", "severity_modifier.R")) + +testthat::test_that("get_severity_modifier works", { + .i <- list( + R_s_num_qalyShort_abs_LB = 12, + R_s_num_qalyShort_abs_UB = 18, + R_s_num_qalyShort_prop_LB = 0.85, + R_s_num_qalyShort_prop_UB = 0.95, + R_s_num_qalyShort_w_LB = 1, + R_s_num_qalyShort_w_bet = 1.2, + R_s_num_qalyShort_w_UB = 1.7 + ) + + sm <- severity_modifier(.i) + + test_cases <- data.frame( + qalys_disease = c(0.8, 0.4, 0.6, 0.4, 3.1, 5.4, 3.5, 16.4, 9.0, 0.5), + qalys_nodisease = c(1.6, 4.1, 4.5, 10.3, 10.5, 11.2, 15.7, 20.5, 23.0, 28.1), + weights = c(1.0, 1.2, 1.2, 1.7, 1.0, 1.0, 1.2, 1.0, 1.2, 1.7) + ) + + testthat::expect_equal( + mapply( + FUN = get_severity_modifier, + qalys_disease = test_cases$qalys_disease, + qalys_nodisease = test_cases$qalys_nodisease, + MoreArgs = list(.severity_modifier = sm) + ), + test_cases$weights + ) +}) + +testthat::test_that("it calculates severity weights for aggregate data", { + i <- f_excel_extract(here::here("1_Data", "PATT RCC_model inputs.xlsx")) + + p <- list(basic = list(cl_y = 1/52, disc_q = 0.035)) + p <- add_population_utility_params(p, .i = i) + + # Modifier: 1.7x (absolute shortfall > 18) + sm1 <- calc_severity_modifier(5.0, 0.5, 2.1, .i = i, .p = p) + testthat::expect_equal(as.vector(sm1), 1.7) + + # Modifier: 1.2x (12 < abs. shortfall < 18 and 0.85 < prop. shortfall < 0.95) + sm2 <- calc_severity_modifier(40.8, 0.5, 2.5, .i = i, .p = p) + testthat::expect_equal(as.vector(sm2), 1.2) + + # Modifier: 1.0x (abs. shortfall < 12 and prop. shortfall < 0.85) + sm3 <- calc_severity_modifier(40.8, 0.5, 6.9, .i = i, .p = p) + testthat::expect_equal(as.vector(sm3), 1.0) + + # Modifier: 1.7x (prop. shortfall > 0.95) + sm4 <- calc_severity_modifier(60.0, 0.5, 0.3, .i = i, .p = p) + testthat::expect_equal(as.vector(sm4), 1.7) + +}) + +testthat::test_that("it calculates severity weights for IPD", { + i <- f_excel_extract(here::here("1_Data", "PATT RCC_model inputs.xlsx")) + + p <- list(basic = list(cl_y = 1/52, disc_q = 0.035)) + p <- add_population_utility_params(p, .i = i) + + age <- round(rnorm(100, 8760, 1000)) / 365 # Mean age 24 years + sex <- c("female", "male")[rbinom(100, 1, 0.5)+1] + + sm1 <- calc_severity_modifier(age, sex, 0.5, .i = i, .p = p) + testthat::expect_equal(as.vector(sm1), 1.7) + + sm2 <- calc_severity_modifier(age, sex, 2.5, .i = i, .p = p) + testthat::expect_equal(as.vector(sm2), 1.7) + + sm3 <- calc_severity_modifier(age, sex, 6.0, .i = i, .p = p) + testthat::expect_equal(as.vector(sm3), 1.2) + + sm4 <- calc_severity_modifier(age, sex, 11.0, .i = i, .p = p) + testthat::expect_equal(as.vector(sm4), 1.0) + +}) diff --git a/tests/testthat/test-treatment_effect_waning.R b/tests/testthat/test-treatment_effect_waning.R new file mode 100644 index 0000000..40581d8 --- /dev/null +++ b/tests/testthat/test-treatment_effect_waning.R @@ -0,0 +1,96 @@ +source(here::here("3_Functions", "survival", "treatment_effect_waning.R")) + +testthat::test_that("treatment effect wanes over time", { + + start_cycle <- 24 + finish_cycle <- 36 + + t <- seq(0, 5, length.out = 61) + + surv_active <- pweibull(t, shape = 1.2, scale = 5.0, lower.tail = FALSE) + surv_ref <- pweibull(t, shape = 1.2, scale = 2.5, lower.tail = FALSE) + + res <- treatment_effect_waning(surv_active, surv_ref, start_cycle, finish_cycle) + + # Calculated in Excel + target <- c(1,0.992678102,0.983258533,0.972909693,0.961955215,0.950566622,0.938853589,0.926893604,0.914744939,0.902453319,0.890055742,0.877582838,0.865060413,0.852510516,0.83995219,0.827402031,0.814874602,0.802382763,0.789937915,0.77755021,0.765228711,0.752981531,0.740815937,0.728738455,0.716754939,0.704233849,0.690574022,0.675834556,0.660081347,0.643386456,0.625827422,0.607486516,0.588449961,0.568807117,0.548649641,0.528070644,0.507163842,0.486498698,0.466570363,0.447359589,0.428847082,0.411013568,0.393839841,0.377306815,0.361395562,0.346087351,0.331363678,0.3172063,0.30359725,0.290518868,0.277953811,0.265885074,0.254295999,0.243170288,0.232492009,0.222245606,0.2124159,0.202988094,0.193947777,0.18528092,0.176973879 ) + + testthat::expect_equal(res, target) + +}) + +testthat::test_that("treatment effect bypass flag works", { + + start_cycle <- 24 + finish_cycle <- 36 + + t <- seq(0, 5, length.out = 61) + + surv_active <- pweibull(t, shape = 1.2, scale = 5.0, lower.tail = FALSE) + surv_ref <- pweibull(t, shape = 1.2, scale = 2.5, lower.tail = FALSE) + + res <- treatment_effect_waning(surv_active, surv_ref, start_cycle, finish_cycle, apply_waning = FALSE) + + testthat::expect_equal(res, surv_active) + +}) + +testthat::test_that("treatment effect wanes immediately", { + + start_cycle <- 24 + finish_cycle <- 24 + + t <- seq(0, 5, length.out = 61) + + surv_active <- pweibull(t, shape = 1.2, scale = 5.0, lower.tail = FALSE) + surv_ref <- pweibull(t, shape = 1.2, scale = 2.5, lower.tail = FALSE) + + res <- treatment_effect_waning(surv_active, surv_ref, start_cycle, finish_cycle) + + expected <- c( + surv_active[1:25], + surv_active[25] * surv_ref[26:61] / surv_ref[25] + ) + + testthat::expect_equal(res, expected) + +}) + +testthat::test_that("treatment effect wanes based on end of cycle", { + + start_cycle <- 24 + finish_cycle <- 36 + + t <- seq(0, 5, length.out = 61) + + surv_active <- pweibull(t, shape = 1.2, scale = 5.0, lower.tail = FALSE) + surv_ref <- pweibull(t, shape = 1.2, scale = 2.5, lower.tail = FALSE) + + res <- treatment_effect_waning(surv_active, surv_ref, start_cycle, finish_cycle, wcc = 1) + + # Calculated in Excel + target <- c(1,0.992678102,0.983258533,0.972909693,0.961955215,0.950566622,0.938853589,0.926893604,0.914744939,0.902453319,0.890055742,0.877582838,0.865060413,0.852510516,0.83995219,0.827402031,0.814874602,0.802382763,0.789937915,0.77755021,0.765228711,0.752981531,0.740815937,0.728738455,0.716754939,0.703597625,0.689321812,0.673989976,0.65767116,0.64044032,0.6223776,0.603567568,0.584098411,0.564061103,0.543548561,0.522654784,0.501474013,0.481040709,0.461335949,0.442340699,0.424035882,0.406402441,0.389421385,0.373073841,0.357341095,0.342204625,0.327646137,0.313647589,0.300191218,0.287259561,0.27483547,0.262902131,0.251443073,0.24044218,0.229883701,0.219752251,0.210032824,0.200710788,0.191771893,0.183202269,0.174988424 ) + + testthat::expect_equal(res, target) + +}) + +testthat::test_that("treatment_effect_waning produces a warning", { + + start_cycle <- 24 + finish_cycle <- 36 + + t <- seq(0, 5, length.out = 61) + + surv_active <- pweibull(t, shape = 2.5, scale = 4.0, lower.tail = FALSE) + surv_ref <- pweibull(t, shape = 1.2, scale = 2.5, lower.tail = FALSE) + + testthat::expect_warning( + res <- treatment_effect_waning(surv_active, surv_ref, start_cycle, finish_cycle, wcc = 1), + regexp = paste( + "Hazard rate in the active treatment is more than the hazard rate in the", + "reference treatment in 18 cycles, the first of which is 42" + ) + ) + +})