diff --git a/NAMESPACE b/NAMESPACE index 62aac35..a5bcceb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ S3method(print,mortaar_life_table) S3method(print,mortaar_life_table_list) export(as.mortaar_life_table) export(as.mortaar_life_table_list) +export(halley.band) export(is.mortaar_life_table) export(is.mortaar_life_table_list) export(life.table) @@ -39,6 +40,7 @@ export(lt.population_size) export(lt.representativity) export(lt.reproduction) export(lt.sexrelation) +export(pop.sim.gomp) export(prep.life.table) importFrom(Rdpack,reprompt) importFrom(graphics,axis) diff --git a/R/halley_band.R b/R/halley_band.R new file mode 100644 index 0000000..b21700b --- /dev/null +++ b/R/halley_band.R @@ -0,0 +1,102 @@ +#' Halley band of the mortality profile of a skeletal population +#' +#' In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)} +#' proposed a method they called 'Halley band' as alternative for other +#' methods of sampling from an skeletal population. It basically involves +#' sampling n times from the age-estimation of each individual and then +#' only taking the 2.5th and 97.5th percentile into account. The space +#' between they dubbed 'Halley band' but pointed out that it +#' is not to be confused with confidence intervals. +#' +#' @param x a data.frame with individuals and age estimations. +#' +#' @param n number of runs, default: 1000. +#' +#' @param uncert level of uncertainty, default: 0.95. +#' +#' @param agebeg numeric. Starting age of the respective individual. +#' +#' @param ageend numeric. Closing age of the respective individual. +#' +#' @param agerange character. Determination if the closing +#' age leaves a gap to the following age category. If yes (= "excluded"), +#' "1" is added to avoid gaps, default: "excluded". +#' +#' @return +#' One data.frame with the following items: +#' +#' \itemize{ +#' \item \bold{age}: age in years. +#' \item \bold{lower_dx}: Lower boundary of uncertainty for dx. +#' \item \bold{upper_dx}: Upper boundary of uncertainty for dx. +#' \item \bold{lower_qx}: Lower boundary of uncertainty for qx. +#' \item \bold{upper_qx}: Upper boundary of uncertainty for qx. +#' \item \bold{lower_lx}: Lower boundary of uncertainty for lx. +#' \item \bold{upper_lx}: Upper boundary of uncertainty for lx. +#' } +#' +#' @references +#' +#' \insertRef{Luy_Wittwer-Backofen_2005}{mortAAR} +#' +#' \insertRef{Luy_Wittwer-Backofen_2008}{mortAAR} +#' +#' @examples +#' +#'# create simulated population with artifical coarsening first +#' pop_sim <- pop.sim.gomp(n = 1000) +#' sim_ranges <- random.cat() +#' +#' # apply random age categories to simulated ages +#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") +#' +#' # create halley bands +#' demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from", ageend = "to", agerange = "excluded") +#' +#' # plot band with ggplot +#' library(ggplot2) +#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx), linetype = 0, fill = "grey") +#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx), linetype = 0, fill = "grey") +#' ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx), linetype = 0, fill = "grey") + +#' @rdname halley.band +#' @export +halley.band <- function(x, n = 1000, uncert = 0.95, agebeg, ageend, agerange = "excluded") { + asd <- data.frame(x) + + # Change the names of agebeg and ageend for further processes to "beg" and "ende". + names(asd)[which(names(asd)==agebeg)] <- "beg" + if (!is.na(ageend)) { + names(asd)[which(names(asd)==ageend)] <- "ende" + } else { + asd$ende <- asd$beg + } + + # Defines if the max of the age ranges is inclusive or exclusive. + if(agerange == "excluded"){ + asd$ende = asd$ende + 1 + } + + low_q <- ( 1 - uncert ) / 2 + up_q <- 1 - ( 1 - uncert ) / 2 + + demo_sim_list <- list() + for (i in 1:n) { + demo_sim_sim <- data.frame(ind = 1:nrow(asd)) %>% + dplyr::mutate(age = (round(runif(dplyr::n(), min = asd$beg, asd$ende) ) ) ) + necdf <- data.frame(a = 1, demo_sim_sim %>% dplyr::group_by(age) %>% dplyr::summarize(Dx = dplyr::n())) + + necdf['dx'] <- necdf['Dx'] / sum(necdf['Dx']) * 100 + necdf['lx'] <- c(100, 100 - cumsum(necdf[, 'dx']))[1:nrow(necdf)] + necdf['qx'] <- necdf['dx'] / necdf['lx'] * 100 + necdf['Ax'] <- necdf[, 'a'] / 2 + necdf['Lx'] <- necdf['a']* necdf['lx'] - ((necdf['a'] - necdf['Ax']) * necdf['dx']) + demo_sim_list[[i]] <- necdf + } + demo_sim_all <- dplyr::bind_rows(demo_sim_list) + output <- demo_sim_all %>% dplyr::group_by(age) %>% dplyr::summarize( + lower_dx = quantile(dx, probs = low_q), upper_dx = quantile(dx, probs = up_q) , + lower_qx = quantile(qx, probs = low_q), upper_qx = quantile(qx, probs = up_q) , + lower_lx = quantile(lx, probs = low_q), upper_lx = quantile(lx, probs = up_q)) + return(output) +} diff --git a/R/population_simulation.R b/R/population_simulation.R index e71f168..e343a61 100644 --- a/R/population_simulation.R +++ b/R/population_simulation.R @@ -1,10 +1,10 @@ -#' Simulates an adult population with Gompertz distribution +#' Simulation of a population of adults with Gompertz distribution #' #' In many instances, it is useful to calculate with a population with #' known parameters. To generate a population with realistic #' characteristics is less obvious than it seems. We operate here #' with the Gompertz distribution which provides a reasonable -#' approximation of human mortality for adult mortality, that is +#' approximation of human mortality for \bold{adult} mortality, that is #' for the ages >= 15 years. The user has to specify #' either the parameter b or the modal age M. The modal age M is #' particular useful as it provides an intuitive understanding of @@ -13,52 +13,46 @@ #' \emph{Sasaki and Kondo 2016}. If neither is given, a population #' with random parameters realistic for pre-modern times is generated. #' -#' @param x number of individuals to be simulated. +#' @param n number of individuals to be simulated. #' #' @param b numeric, optional. Gompertz parameter controlling the #' level of mortality. #' #' @param M numeric, optional. Modal age M. #' -#' @param start_age numeric, optional. Start age, default: 15 years. +#' @param start_age numeric. Start age, default: 15 years. #' #' @return #' A list of two data.frames with the following items: #' +#' First data.frame #' \itemize{ -#' \item \bold{First data.frame} #' \item \bold{N}: Number of individuals. -#' \item \bold{b}: Gompertz parameter controlling mortality. +#' \item \bold{b}: Gompertz parameter controlling mortality. #' \item \bold{M}: Modal age. -#' \item \bold{a}: Gompertz parameter controlling hazard of the +#' \item \bold{a}: Gompertz parameter controlling hazard of the #' youngest age group. #' } #' +#'Second data.frame +#' \itemize{ +#' \item \bold{ind}: ID of individuals. +#' \item \bold{age}: Simulated absolute age. +#' } +#' #' @references #' -#' \insertRef{sasaki and kondo 2016}{mortAAR} +#' \insertRef{sasaki_kondo_2016}{mortAAR} #' #' @examples #' -#' pop_lt <- pop.sim.gomp(10000, M = 35) - -#' @rdname pop.sim.gomp -#' @export -pop.sim.gomp <- function(x, b, M, start_age) { - UseMethod("pop.sim.gomp") -} - -#' @rdname pop.sim.gomp -#' @export -#' @noRd -pop.sim.gomp.default <- function(x, b, M, start_age) { - stop("x must be a numeric value.") -} +#' pop_sim <- pop.sim.gomp(n = 10000, M = 35) +#' pop_sim <- pop.sim.gomp(n = 10000, b = 0.03) +#' pop_sim <- pop.sim.gomp(n = 10000) #' @rdname pop.sim.gomp #' @export -#' @noRd -pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) { +pop.sim.gomp <- function(n, b = NULL, M = NULL, start_age = 15) { if ( length(M) > 0) { M_1 <- 0 M_2 <- 0 @@ -71,12 +65,16 @@ pop.sim.gomp.df <- function(x, b = NULL, M = NULL, start_age = 15) { } } else if (length(b) > 0) { a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) ) + M <- 1 / b * log (b/a) + start_age } else { b <- runif(n = 1, min = 0.02, max = 0.05) a <- exp(rnorm(1, (-66.77 * (b - 0.0718) - 7.119), sqrt(0.0823) ) ) + M <- 1 / b * log (b/a) + start_age } - lt_result <- data.frame(ind = 1:x) %>% - mutate(age = round(flexsurv::rgompertz(n(), b, a) ) + start_age) - return(lt_result) + lt_result <- data.frame(ind = 1:n) %>% + dplyr::mutate(age = round(flexsurv::rgompertz(dplyr::n(), b, a) ) + start_age) + lt_values <- data.frame(n = n, b = b, a = a, M = M, start_age = start_age) + lt_list <- list(values = lt_values, result = lt_result) + return(lt_list) } diff --git a/R/random_categories.R b/R/random_categories.R new file mode 100644 index 0000000..556db5c --- /dev/null +++ b/R/random_categories.R @@ -0,0 +1,98 @@ +#' Generation of random age ranges +#' +#' Helper function that generates random age categories of absolute ages. +#' It is mainly used together with the functions \code{pop.sim.gomp} +#' and \code{random.cat.apply}. +#' +#' @param n_cat numeric. Number of categories, default: 20. +#' +#' @param min_age numeric. Minimum age, default: 15. +#' +#' @param max_age numeric. Maximum age, default: 75. +#' +#' @param max_cat_low numeric. Lower boundary of highest age categoriy, default: 60. +#' +#' @return +#' One data.frame with the following items: +#' +#' \itemize{ +#' \item \bold{from}: Lower boundary of age category. +#' \item \bold{to}: Upper boundary of age category. +#' } +#' +#' @examples +#' sim_ranges <- random.cat() + +# generate random age ranges with 5 year ranges +random.cat <- function(n_cat = 20, min_age = 15, max_cat_low = 60, max_age = 75) { + n_sim_ranges <- 0 + sim_ranges <- data.frame() + while (n_sim_ranges < n_cat){ + range_from <- round(runif(1, min = min_age, max = max_age)/5) * 5 + if(range_from > max_cat_low) {range_from <- max_cat_low} + + #define probabilities + beta_1 <- range_from/40 + if(beta_1 == 0){beta_1 <- 0.01} + beta_2 <- 3 - beta_1 + range_probs <- dbeta(seq(1, 8, 1)/8.1, beta_1, beta_2) + + range_to <- range_from + sample(seq(1, 8, 1), 1 , prob = range_probs) *5 -1 + if(range_to > max_cat_low) {range_to <- max_age} + sim_ranges <- rbind(sim_ranges, data.frame(from = range_from, to = range_to)) + sim_ranges <- sim_ranges %>% dplyr::distinct() + n_sim_ranges <- nrow(sim_ranges) + } + return(sim_ranges) +} + + + +#' Applying random age ranges to individuals with absolute ages +#' +#' Helper function that applies random age categories to "known" absolute ages. +#' It is mainly used together with the functions \code{pop.sim.gomp} +#' and \code{random.cat}. +#' +#' @param x a data.frame with individual absolute ages. +#' +#' @param age_ranges a data.frame with age ranges. +#' +#' @param from numeric. Column name for the begin of an age range. +#' +#' @param to numeric. Column name for the end of an age range. +#' +#' @return +#' The original data.frame \code{x} with two additional columns: +#' +#' \itemize{ +#' \item \bold{from}: Lower boundary of age category. +#' \item \bold{to}: Upper boundary of age category. +#' } +#' +#' @examples +#' +#' # Simulate population and age ranges first +#' pop_sim <- pop.sim.gomp(n = 10000) +#' sim_ranges <- random.cat() +#' +#' # apply random age categories to simulated ages +#' sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") + +random.cat.apply <- function(x, age, age_ranges, from, to) { + asd <- data.frame(x) + max_age <- max(age_ranges['to']) + a_r <- data.frame(from = age_ranges['from'], to = age_ranges['to']) + + for (j in 1:nrow(asd)){ + this_age <- asd[j,'age'] + if(this_age > max_age) + { this_age <- max_age} + possible_age_cat <- subset(a_r, from <= this_age & to >= this_age) + selected_age_index <- sample.int(nrow(possible_age_cat), 1) + selected_age_cat <- possible_age_cat[selected_age_index,] + asd$from[j] <- selected_age_cat$from + asd$to[j] <- selected_age_cat$to + } + return(asd) +} diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index cbae8d1..eb127c5 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -282,6 +282,30 @@ @article{kokkotidis_graberfeld-_1991 year = {1991} } +@Article{Luy_Wittwer-Backofen_2005, +author = {Luy, Marc A. and Wittwer-Backofen, Ursula}, +title = {{Das Halley-Band für paläodemographische Mortalitätsanalysen}}, +journal = {{Zeitschrift für Bevölkerungswissenschaft}}, +volume = {30}, +number = {2–3}, +doi = {}, +pages = {219–244}, +year = {2005} +} + +@Incollection{Luy_Wittwer-Backofen_2008, +author = {Luy, Marc A. and Wittwer-Backofen, Ursula}, +title = {{The Halley band for paleodemographic mortality analysis}}, +booktitle = {{Recent advances in paleodemography. Data, techniques, patterns}}, +series = {Cambridge Stud. Biol. and Evolutionary Anthr.}, +volume = {31}, +editor = {Bocquet-Appel, J.-P.}, +publisher = {Springer}, +address = {Dordrecht}, +pages = {119-141}, +year = {2008} +} + @book{martin_lehrbuch_1928, location = {Jena}, edition = {2.}, @@ -321,8 +345,6 @@ @article{mcfadden_oxenham_2018b year = {2018} } - - @Article{mcfadden_oxenham_2019, author = {McFadden, Clare and Oxenham, Marc F.}, title = {{The Paleodemographic Measure of Maternal Mortality and a Multifaceted Approach to Maternal Health}}, @@ -345,8 +367,6 @@ @Article{mcfadden_et_al_2020 abstract = {} } - - @article{moghaddam_et_al_muensingen_2016, title = {{Social stratigraphy in Late Iron Age Switzerland: stable carbon, nitrogen and sulphur isotope analysis of human remains from Münsingen}}, @@ -408,6 +428,19 @@ @article{rinne_odagsen_2002 url = {http://www.jna.uni-kiel.de/index.php/jna/article/view/52} } +@Article{sasaki_kondo_2016, +author = {Sasaki, Tomohiko and Kondo, Osamu}, +title = {{An informative prior probability distribution of the gompertz parameters for bayesian approaches in paleodemography}}, +journal = {{American Journal of Physical Anthropology}}, +volume = {159}, +number = {3}, +doi = {10.1002/ajpa.22891}, +pages = {523–533}, +year = {2016}, +} + + + @Incollection{scheeres_et_al_investigations_in_press, author = {Scheeres, M. and Knipper, C. and Schoenfelder, M. and Hauschild, M. and Siebel, W. and Alt, K.W.}, title = {{Bioarchaeometric investigations (87Sr/86Sr and d18O) of the La Tène burial community of Münsingen-Rain, Switzerland}}, diff --git a/man/halley.band.Rd b/man/halley.band.Rd new file mode 100644 index 0000000..345f227 --- /dev/null +++ b/man/halley.band.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/halley_band.R +\name{halley.band} +\alias{halley.band} +\title{Halley band of the mortality profile of a skeletal population} +\usage{ +halley.band(x, n = 1000, uncert = 0.95, agebeg, ageend, agerange = "excluded") +} +\arguments{ +\item{x}{a data.frame with individuals and age estimations.} + +\item{n}{number of runs, default: 1000.} + +\item{uncert}{level of uncertainty, default: 0.95.} + +\item{agebeg}{numeric. Starting age of the respective individual.} + +\item{ageend}{numeric. Closing age of the respective individual.} + +\item{agerange}{character. Determination if the closing +age leaves a gap to the following age category. If yes (= "excluded"), +"1" is added to avoid gaps, default: "excluded".} +} +\value{ +One data.frame with the following items: + +\itemize{ + \item \bold{age}: age in years. + \item \bold{lower_dx}: Lower boundary of uncertainty for dx. + \item \bold{upper_dx}: Upper boundary of uncertainty for dx. + \item \bold{lower_qx}: Lower boundary of uncertainty for qx. + \item \bold{upper_qx}: Upper boundary of uncertainty for qx. + \item \bold{lower_lx}: Lower boundary of uncertainty for lx. + \item \bold{upper_lx}: Upper boundary of uncertainty for lx. + } +} +\description{ +In a series of papers, M. A. Luy and U. Wittwer-Backofen \emph{(2005; 2008)} +proposed a method they called 'Halley band' as alternative for other +methods of sampling from an skeletal population. It basically involves +sampling n times from the age-estimation of each individual and then +only taking the 2.5th and 97.5th percentile into account. The space +between they dubbed 'Halley band' but pointed out that it +is not to be confused with confidence intervals. +} +\examples{ + +# create simulated population with artifical coarsening first +pop_sim <- pop.sim.gomp(n = 1000) +sim_ranges <- random.cat() + +# apply random age categories to simulated ages +sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") + +# create halley bands +demo <- halley.band(sim_appl, n = 1000, uncert = 0.95, agebeg = "from", ageend = "to", agerange = "excluded") + +# plot band with ggplot +library(ggplot2) +ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_dx, ymax = upper_dx), linetype = 0, fill = "grey") +ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_lx, ymax = upper_lx), linetype = 0, fill = "grey") +ggplot(demo) + geom_ribbon(aes(x = age, ymin = lower_qx, ymax = upper_qx), linetype = 0, fill = "grey") +} +\references{ +\insertRef{Luy_Wittwer-Backofen_2005}{mortAAR} + +\insertRef{Luy_Wittwer-Backofen_2008}{mortAAR} +} diff --git a/man/pop.sim.gomp.Rd b/man/pop.sim.gomp.Rd new file mode 100644 index 0000000..eea9799 --- /dev/null +++ b/man/pop.sim.gomp.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/population_simulation.R +\name{pop.sim.gomp} +\alias{pop.sim.gomp} +\title{Simulation of a population of adults with Gompertz distribution} +\usage{ +pop.sim.gomp(n, b = NULL, M = NULL, start_age = 15) +} +\arguments{ +\item{n}{number of individuals to be simulated.} + +\item{b}{numeric, optional. Gompertz parameter controlling the +level of mortality.} + +\item{M}{numeric, optional. Modal age M.} + +\item{start_age}{numeric. Start age, default: 15 years.} +} +\value{ +A list of two data.frames with the following items: + +First data.frame +\itemize{ + \item \bold{N}: Number of individuals. + \item \bold{b}: Gompertz parameter controlling mortality. + \item \bold{M}: Modal age. + \item \bold{a}: Gompertz parameter controlling hazard of the + youngest age group. + } + +Second data.frame +\itemize{ + \item \bold{ind}: ID of individuals. + \item \bold{age}: Simulated absolute age. + } +} +\description{ +In many instances, it is useful to calculate with a population with +known parameters. To generate a population with realistic +characteristics is less obvious than it seems. We operate here +with the Gompertz distribution which provides a reasonable +approximation of human mortality for \bold{adult} mortality, that is +for the ages >= 15 years. The user has to specify +either the parameter b or the modal age M. The modal age M is +particular useful as it provides an intuitive understanding of +the resulting age distribution. In both instances, the second +parameter a is generated by the regression formula found by +\emph{Sasaki and Kondo 2016}. If neither is given, a population +with random parameters realistic for pre-modern times is generated. +} +\examples{ + +pop_sim <- pop.sim.gomp(n = 10000, M = 35) +pop_sim <- pop.sim.gomp(n = 10000, b = 0.03) +pop_sim <- pop.sim.gomp(n = 10000) +} +\references{ +\insertRef{sasaki_kondo_2016}{mortAAR} +} diff --git a/man/random.cat.Rd b/man/random.cat.Rd new file mode 100644 index 0000000..d8a69ab --- /dev/null +++ b/man/random.cat.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/random_categories.R +\name{random.cat} +\alias{random.cat} +\title{Generation of random age ranges} +\usage{ +random.cat(n_cat = 20, min_age = 15, max_cat_low = 60, max_age = 75) +} +\arguments{ +\item{n_cat}{numeric. Number of categories, default: 20.} + +\item{min_age}{numeric. Minimum age, default: 15.} + +\item{max_cat_low}{numeric. Lower boundary of highest age categoriy, default: 60.} + +\item{max_age}{numeric. Maximum age, default: 75.} +} +\value{ +One data.frame with the following items: + +\itemize{ + \item \bold{from}: Lower boundary of age category. + \item \bold{to}: Upper boundary of age category. + } +} +\description{ +Helper function that generates random age categories of absolute ages. +It is mainly used together with the functions \code{pop.sim.gomp} +and \code{random.cat.apply}. +} +\examples{ +sim_ranges <- random.cat() +} diff --git a/man/random.cat.apply.Rd b/man/random.cat.apply.Rd new file mode 100644 index 0000000..ad15cf4 --- /dev/null +++ b/man/random.cat.apply.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/random_categories.R +\name{random.cat.apply} +\alias{random.cat.apply} +\title{Applying random age ranges to individuals with absolute ages} +\usage{ +random.cat.apply(x, age, age_ranges, from, to) +} +\arguments{ +\item{x}{a data.frame with individual absolute ages.} + +\item{age_ranges}{a data.frame with age ranges.} + +\item{from}{numeric. Column name for the begin of an age range.} + +\item{to}{numeric. Column name for the end of an age range.} +} +\value{ +The original data.frame \code{x} with two additional columns: + +\itemize{ + \item \bold{from}: Lower boundary of age category. + \item \bold{to}: Upper boundary of age category. + } +} +\description{ +Helper function that applies random age categories to "known" absolute ages. +It is mainly used together with the functions \code{pop.sim.gomp} +and \code{random.cat}. +} +\examples{ + +# Simulate population and age ranges first +pop_sim <- pop.sim.gomp(n = 10000) +sim_ranges <- random.cat() + +# apply random age categories to simulated ages +sim_appl <- random.cat.apply(pop_sim$result, age = "age", age_ranges = sim_ranges, from = "from", to = "to") +}