From cf742de73474f66b159c9f89ce7f9e7e15034649 Mon Sep 17 00:00:00 2001 From: Andreas Bender Date: Sat, 8 Feb 2020 19:10:33 +0100 Subject: [PATCH] [WIP] issue #139 --- NAMESPACE | 1 + R/pammfit.R | 4 +- R/predict.R | 49 +++++++++++++++++++++++++ man/pamm.Rd | 3 +- man/predict.pamm.Rd | 33 +++++++++++++++++ man/rpexp.Rd | 3 +- tests/testthat/test-predict-functions.R | 23 ++++++++++-- 7 files changed, 109 insertions(+), 7 deletions(-) create mode 100644 man/predict.pamm.Rd diff --git a/NAMESPACE b/NAMESPACE index ecceb75b..8ca8bb19 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,7 @@ S3method(mutate,ped) S3method(nest_tdc,default) S3method(nest_tdc,list) S3method(plot,pamm) +S3method(predict,pamm) S3method(predictSurvProb,pamm) S3method(print,pamm) S3method(rename,nested_fdf) diff --git a/R/pammfit.R b/R/pammfit.R index d1f8de9e..39a42ff9 100644 --- a/R/pammfit.R +++ b/R/pammfit.R @@ -45,7 +45,9 @@ append_ped_attr <- function(pamm, ped) { #' pamm( #' ped_status ~ s(tend) + complications, #' data = tumor, -#' trafo_args = list(formula = Surv(days, status)~complications)) +#' trafo_args = list( +#' formula = Surv(days, status)~complications), +#' cut = seq(0, 3000, by = 50)) #' @export pamm <- function( formula, diff --git a/R/predict.R b/R/predict.R index f4257b9f..69403b8d 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,3 +1,52 @@ +#' Predict hazard, cumulative hazard or survival probability +#' +#' @param object An object of class \code{pam_xgb} +#' @param newdata A data set containing the same covariates as used for model +#' fitting. If of class \code{data.frame}, the function will transform the +#' data to the PED format using the same settings as for the data used in +#' estimation. +#' @param type The type of prediction desired. Either hazard (\code{type = "hazard"}), +#' cumulative hazard (\code{type = "cumu_hazard"}) or survival probability +#' (\code{type = "surv_prob"}). +#' @param ... Currently not used. +#' @importFrom stats predict +#' @return A matrix of predictions containing one row per +#' observation (row in newdata) and 1 column per specified time in the +#' \code{times} argument. +#' @seealso pamm +#' @export +predict.pamm <- function( + object, + newdata, + type = c("hazard", "cumu_hazard", "surv_prob"), + ...) { + + type <- match.arg(type) + + if (!is.ped(newdata)) { + newdata <- as_ped(object, newdata) + } + + newdata[["pred"]] <- predict(unpam(object), newdata, type = "response") + if (type == "cumu_hazard") { + newdata <- newdata %>% + group_by(.data$id) %>% + mutate(pred = cumsum(.data$pred * exp(.data$offset)))#TODO: is it correct to use offset here? + } + if (type == "surv_prob") { + newdata <- newdata %>% + group_by(.data[["id"]]) %>% + mutate(pred = exp(-cumsum(.data$pred * exp(.data$offset)))) + } + + newdata %>% + group_by(.data[["id"]]) %>% + filter(row_number() == n()) %>% + pull(.data[["pred"]]) # TODO: is the hazard/surv prob in the last available interval a useful return? + +} + + #' S3 method for pamm objects for compatibility with package pec #' #' @inheritParams pec::predictSurvProb diff --git a/man/pamm.Rd b/man/pamm.Rd index 036b11a5..80d40dc0 100644 --- a/man/pamm.Rd +++ b/man/pamm.Rd @@ -71,7 +71,8 @@ summary(pam) pamm( ped_status ~ s(tend) + complications, data = tumor, -trafo_args = list(formula = Surv(days, status)~complications)) + trafo_args = list(formula = Surv(days, status)~complications), + cut = seq(0, 3000, by = 50)) } \seealso{ \code{\link[mgcv]{gam}} diff --git a/man/predict.pamm.Rd b/man/predict.pamm.Rd new file mode 100644 index 00000000..b64d0b2f --- /dev/null +++ b/man/predict.pamm.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict.R +\name{predict.pamm} +\alias{predict.pamm} +\title{Predict hazard, cumulative hazard or survival probability} +\usage{ +\method{predict}{pamm}(object, newdata, type = c("hazard", "cumu_hazard", "surv_prob"), ...) +} +\arguments{ +\item{object}{An object of class \code{pam_xgb}} + +\item{newdata}{A data set containing the same covariates as used for model +fitting. If of class \code{data.frame}, the function will transform the +data to the PED format using the same settings as for the data used in +estimation.} + +\item{type}{The type of prediction desired. Either hazard (\code{type = "hazard"}), +cumulative hazard (\code{type = "cumu_hazard"}) or survival probability +(\code{type = "surv_prob"}).} + +\item{...}{Currently not used.} +} +\value{ +A matrix of predictions containing one row per +observation (row in newdata) and 1 column per specified time in the +\code{times} argument. +} +\description{ +Predict hazard, cumulative hazard or survival probability +} +\seealso{ +pamm +} diff --git a/man/rpexp.Rd b/man/rpexp.Rd index 633c8834..a9b44df1 100644 --- a/man/rpexp.Rd +++ b/man/rpexp.Rd @@ -17,7 +17,8 @@ rpexp(n = 1, rate = 1, t = 0) and \code{t} should be in increasing order.} } \description{ -This is a copy of the same function from \code{\link[msm]{rpexp}}. +This is a copy of the same function from \code{rpexp} from package +\pkg{msm}. Copied here to reduce dependencies. } \keyword{internal} diff --git a/tests/testthat/test-predict-functions.R b/tests/testthat/test-predict-functions.R index c23c5b32..66b9854f 100644 --- a/tests/testthat/test-predict-functions.R +++ b/tests/testthat/test-predict-functions.R @@ -9,8 +9,23 @@ context("Predict functions") pam2 <- pamm(ped_status ~ s(tend, k = 3) + complications, data = ped, engine = "bam", method = "fREML", discrete = TRUE) + ## predict S3 generic + predh <- predict(pam, newdata = tumor[21:23, ], type = "hazard") + predch <- predict(pam, newdata = tumor[21:23, ], type = "cumu_hazard") + predsp <- predict(pam, newdata = tumor[21:23, ], type = "surv_prob") + expect_identical(round(predh * 100, 2), c(.19, .02, .02)) + expect_identical(round(predch, 2), c(.2, .96, .96)) + expect_identical(round(predsp, 2), c(.81, .38, .38)) + + predh2 <- predict(pam2, newdata = tumor[21:23, ], type = "hazard") + predch2 <- predict(pam2, newdata = tumor[21:23, ], type = "cumu_hazard") + predsp2 <- predict(pam2, newdata = tumor[21:23, ], type = "surv_prob") + expect_identical(round(predh2 * 100, 2), c(.19, .02, .02)) + expect_identical(round(predch2, 2), c(.2, .96, .96)) + expect_identical(round(predsp2, 2), c(.81, .38, .38)) + ## predictSurvProb (pec) generic - spmat <- predictSurvProb.pamm(pam, tumor[21:23,], times = c(90, 500, 1217)) + spmat <- predictSurvProb(pam, tumor[21:23,], times = c(90, 500, 1217)) expect_identical( round(spmat, 2), matrix( @@ -23,9 +38,9 @@ context("Predict functions") ncol = 3 ) ) - - expect_error(predictSurvProb.pamm(pam, tumor[21:23,], times = c(90, 500, 2000))) - spmat2 <- predictSurvProb.pamm(pam2, tumor[21:23,], times = c(90, 500, 1217)) + expect_identical(spmat[1,1], predsp[1]) + expect_error(predictSurvProb(pam, tumor[21:23,], times = c(90, 500, 2000))) + spmat2 <- predictSurvProb(pam2, tumor[21:23,], times = c(90, 500, 1217)) expect_identical(round(spmat, 2), round(spmat2, 2)) }