From cf742de73474f66b159c9f89ce7f9e7e15034649 Mon Sep 17 00:00:00 2001
From: Andreas Bender <bender.at.R@gmail.com>
Date: Sat, 8 Feb 2020 19:10:33 +0100
Subject: [PATCH] [WIP] issue #139

---
 NAMESPACE                               |  1 +
 R/pammfit.R                             |  4 +-
 R/predict.R                             | 49 +++++++++++++++++++++++++
 man/pamm.Rd                             |  3 +-
 man/predict.pamm.Rd                     | 33 +++++++++++++++++
 man/rpexp.Rd                            |  3 +-
 tests/testthat/test-predict-functions.R | 23 ++++++++++--
 7 files changed, 109 insertions(+), 7 deletions(-)
 create mode 100644 man/predict.pamm.Rd

diff --git a/NAMESPACE b/NAMESPACE
index ecceb75b..8ca8bb19 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -41,6 +41,7 @@ S3method(mutate,ped)
 S3method(nest_tdc,default)
 S3method(nest_tdc,list)
 S3method(plot,pamm)
+S3method(predict,pamm)
 S3method(predictSurvProb,pamm)
 S3method(print,pamm)
 S3method(rename,nested_fdf)
diff --git a/R/pammfit.R b/R/pammfit.R
index d1f8de9e..39a42ff9 100644
--- a/R/pammfit.R
+++ b/R/pammfit.R
@@ -45,7 +45,9 @@ append_ped_attr <- function(pamm, ped) {
 #' pamm(
 #'  ped_status ~ s(tend) + complications,
 #'  data = tumor,
-#' trafo_args = list(formula = Surv(days, status)~complications))
+#'  trafo_args = list(
+#'    formula = Surv(days, status)~complications),
+#'    cut = seq(0, 3000, by = 50))
 #' @export
 pamm <- function(
   formula,
diff --git a/R/predict.R b/R/predict.R
index f4257b9f..69403b8d 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -1,3 +1,52 @@
+#' Predict hazard, cumulative hazard or survival probability
+#'
+#' @param object An object of class \code{pam_xgb}
+#' @param newdata A data set containing the same covariates as used for model
+#' fitting. If of class \code{data.frame}, the function will transform the
+#' data to the PED format using the same settings as for the data used in
+#' estimation.
+#' @param type The type of prediction desired. Either hazard (\code{type = "hazard"}),
+#' cumulative hazard (\code{type = "cumu_hazard"}) or survival probability
+#' (\code{type = "surv_prob"}).
+#' @param ... Currently not used.
+#' @importFrom stats predict
+#' @return A matrix of predictions containing one row per
+#' observation (row in newdata) and 1 column per  specified time in the
+#' \code{times} argument.
+#' @seealso pamm
+#' @export
+predict.pamm <- function(
+  object,
+  newdata,
+  type = c("hazard", "cumu_hazard", "surv_prob"),
+  ...) {
+
+  type <- match.arg(type)
+
+  if (!is.ped(newdata)) {
+    newdata <- as_ped(object, newdata)
+  }
+
+  newdata[["pred"]] <- predict(unpam(object), newdata, type = "response")
+  if (type == "cumu_hazard") {
+    newdata <- newdata %>%
+      group_by(.data$id) %>%
+      mutate(pred = cumsum(.data$pred * exp(.data$offset)))#TODO: is it correct to use offset here?
+  }
+  if (type == "surv_prob") {
+     newdata <- newdata %>%
+      group_by(.data[["id"]]) %>%
+      mutate(pred = exp(-cumsum(.data$pred * exp(.data$offset))))
+  }
+
+  newdata %>%
+    group_by(.data[["id"]]) %>%
+    filter(row_number() == n()) %>%
+    pull(.data[["pred"]]) # TODO: is the hazard/surv prob in the last available interval a useful return?
+
+}
+
+
 #' S3 method for pamm objects for compatibility with package pec
 #'
 #' @inheritParams pec::predictSurvProb
diff --git a/man/pamm.Rd b/man/pamm.Rd
index 036b11a5..80d40dc0 100644
--- a/man/pamm.Rd
+++ b/man/pamm.Rd
@@ -71,7 +71,8 @@ summary(pam)
 pamm(
  ped_status ~ s(tend) + complications,
  data = tumor,
-trafo_args = list(formula = Surv(days, status)~complications))
+ trafo_args = list(formula = Surv(days, status)~complications),
+ cut = seq(0, 3000, by = 50))
 }
 \seealso{
 \code{\link[mgcv]{gam}}
diff --git a/man/predict.pamm.Rd b/man/predict.pamm.Rd
new file mode 100644
index 00000000..b64d0b2f
--- /dev/null
+++ b/man/predict.pamm.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict.R
+\name{predict.pamm}
+\alias{predict.pamm}
+\title{Predict hazard, cumulative hazard or survival probability}
+\usage{
+\method{predict}{pamm}(object, newdata, type = c("hazard", "cumu_hazard", "surv_prob"), ...)
+}
+\arguments{
+\item{object}{An object of class \code{pam_xgb}}
+
+\item{newdata}{A data set containing the same covariates as used for model
+fitting. If of class \code{data.frame}, the function will transform the
+data to the PED format using the same settings as for the data used in
+estimation.}
+
+\item{type}{The type of prediction desired. Either hazard (\code{type = "hazard"}),
+cumulative hazard (\code{type = "cumu_hazard"}) or survival probability
+(\code{type = "surv_prob"}).}
+
+\item{...}{Currently not used.}
+}
+\value{
+A matrix of predictions containing one row per
+observation (row in newdata) and 1 column per  specified time in the
+\code{times} argument.
+}
+\description{
+Predict hazard, cumulative hazard or survival probability
+}
+\seealso{
+pamm
+}
diff --git a/man/rpexp.Rd b/man/rpexp.Rd
index 633c8834..a9b44df1 100644
--- a/man/rpexp.Rd
+++ b/man/rpexp.Rd
@@ -17,7 +17,8 @@ rpexp(n = 1, rate = 1, t = 0)
     and \code{t} should be in increasing order.}
 }
 \description{
-This is a copy of the same function from \code{\link[msm]{rpexp}}.
+This is a copy of the same function from \code{rpexp} from package
+\pkg{msm}.
 Copied here to reduce dependencies.
 }
 \keyword{internal}
diff --git a/tests/testthat/test-predict-functions.R b/tests/testthat/test-predict-functions.R
index c23c5b32..66b9854f 100644
--- a/tests/testthat/test-predict-functions.R
+++ b/tests/testthat/test-predict-functions.R
@@ -9,8 +9,23 @@ context("Predict functions")
   pam2 <- pamm(ped_status ~ s(tend, k = 3) + complications, data = ped,
     engine = "bam", method = "fREML", discrete = TRUE)
 
+  ## predict S3 generic
+  predh <- predict(pam, newdata = tumor[21:23, ], type = "hazard")
+  predch <- predict(pam, newdata = tumor[21:23, ], type = "cumu_hazard")
+  predsp <- predict(pam, newdata = tumor[21:23, ], type = "surv_prob")
+  expect_identical(round(predh * 100, 2), c(.19, .02, .02))
+  expect_identical(round(predch, 2), c(.2, .96, .96))
+  expect_identical(round(predsp, 2), c(.81, .38, .38))
+
+  predh2 <- predict(pam2, newdata = tumor[21:23, ], type = "hazard")
+  predch2 <- predict(pam2, newdata = tumor[21:23, ], type = "cumu_hazard")
+  predsp2 <- predict(pam2, newdata = tumor[21:23, ], type = "surv_prob")
+  expect_identical(round(predh2 * 100, 2), c(.19, .02, .02))
+  expect_identical(round(predch2, 2), c(.2, .96, .96))
+  expect_identical(round(predsp2, 2), c(.81, .38, .38))
+
   ## predictSurvProb (pec) generic
-  spmat <- predictSurvProb.pamm(pam, tumor[21:23,], times = c(90, 500, 1217))
+  spmat <- predictSurvProb(pam, tumor[21:23,], times = c(90, 500, 1217))
   expect_identical(
     round(spmat, 2),
     matrix(
@@ -23,9 +38,9 @@ context("Predict functions")
       ncol = 3
     )
   )
-
-  expect_error(predictSurvProb.pamm(pam, tumor[21:23,], times = c(90, 500, 2000)))
-  spmat2 <- predictSurvProb.pamm(pam2, tumor[21:23,], times = c(90, 500, 1217))
+  expect_identical(spmat[1,1], predsp[1])
+  expect_error(predictSurvProb(pam, tumor[21:23,], times = c(90, 500, 2000)))
+  spmat2 <- predictSurvProb(pam2, tumor[21:23,], times = c(90, 500, 1217))
   expect_identical(round(spmat, 2), round(spmat2, 2))
 
   }