diff --git a/OncoSimulR/DESCRIPTION b/OncoSimulR/DESCRIPTION index d0de8249..df7b4b08 100644 --- a/OncoSimulR/DESCRIPTION +++ b/OncoSimulR/DESCRIPTION @@ -29,7 +29,7 @@ License: GPL (>= 3) URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/ BugReports: https://github.com/rdiaz02/OncoSimul/issues Depends: R (>= 3.3.0) -Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel, nem +Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel, nem, ggmuller Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0), rmarkdown, bookdown, pander LinkingTo: Rcpp VignetteBuilder: knitr diff --git a/OncoSimulR/NAMESPACE b/OncoSimulR/NAMESPACE index 7a21baaf..8820f00b 100644 --- a/OncoSimulR/NAMESPACE +++ b/OncoSimulR/NAMESPACE @@ -64,6 +64,7 @@ importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate", importFrom("smatr", "ma") ## for major axis regression in some tests importFrom("car", "linearHypothesis") importFrom("nem", "transitive.reduction") +importFrom("ggmuller", "get_Muller_df", "Muller_plot", "Muller_pop_plot") ## importFrom("slam", "simple_triplet_zero_matrix", ## "colapply_simple_triplet_matrix", ## "col_sums") diff --git a/OncoSimulR/R/OncoSimulR.R b/OncoSimulR/R/OncoSimulR.R index b7593c79..4ba78022 100644 --- a/OncoSimulR/R/OncoSimulR.R +++ b/OncoSimulR/R/OncoSimulR.R @@ -30,7 +30,7 @@ oncoSimulSample <- function(Nindiv, if(length(fp$drv)) { nd <- (2: round(0.75 * length(fp$drv))) } else { - nd <- 9e6 + nd <- 9e6 } } else { nd <- (2 : round(0.75 * max(fp))) @@ -70,14 +70,14 @@ oncoSimulSample <- function(Nindiv, seed = "auto"){ ## No longer using mclapply, because of the way we use the limit on ## the number of tries. - + ## leaving detectionSize and detectionDrivers as they are, produces ## the equivalente of uniform sampling. For last, fix a single number ## detectionDrivers when there are none: had we left it at 0, then ## when there are no drivers we would stop at the first sampling ## period. - + if(Nindiv < 1) stop("Nindiv must be >= 1") if(keepPhylog) @@ -112,8 +112,8 @@ oncoSimulSample <- function(Nindiv, HittedMaxTries = TRUE, HittedWallTime = FALSE, UnrecoverExcept = FALSE - )) - } + )) + } f.out.time <- function() { message("Run out of time") @@ -123,8 +123,8 @@ oncoSimulSample <- function(Nindiv, HittedMaxTries = FALSE, HittedWallTime = TRUE, UnrecoverExcept = FALSE - )) - } + )) + } f.out.attempts.cpp <- function() { message("Run out of attempts (in C++)") @@ -134,8 +134,8 @@ oncoSimulSample <- function(Nindiv, HittedMaxTries = TRUE, HittedWallTime = FALSE, UnrecoverExcept = FALSE - )) - } + )) + } f.out.time.cpp <- function() { message("Run out of time (in C++)") @@ -145,8 +145,8 @@ oncoSimulSample <- function(Nindiv, HittedMaxTries = FALSE, HittedWallTime = TRUE, UnrecoverExcept = FALSE - )) - } + )) + } f.out.unrecover.except <- function(x) { message("Unrecoverable exception (in C++)") @@ -157,13 +157,13 @@ oncoSimulSample <- function(Nindiv, HittedWallTime = NA, UnrecoverExcept = TRUE, ExceptionMessage = x$other$ExceptionMessage - )) - } - - + )) + } + + startTime <- Sys.time() while(TRUE) { - + possibleAttempts <- attemptsLeft - (numToRun - 1) ## I think I do not want a try here. tmp <- oncoSimulIndiv(fp = fp, @@ -195,11 +195,11 @@ oncoSimulSample <- function(Nindiv, mutationPropGrowth = mutationPropGrowth, detectionProb = detectionProb, AND_DrvProbExit = AND_DrvProbExit, - fixation = fixation) + fixation = fixation) if(tmp$other$UnrecoverExcept) { return(f.out.unrecover.except(tmp)) } - + pop[[indiv]] <- tmp numToRun <- (numToRun - 1) attemptsUsed <- attemptsUsed + tmp$other$attemptsUsed @@ -213,17 +213,17 @@ oncoSimulSample <- function(Nindiv, ) } indiv <- indiv + 1 - + ## We need to check in exactly this order. Attempts left only ## matters if no remaining individuals to run. But C++ might bail ## out in exactly the last individual - if( + if( (exists("HittedMaxTries", where = tmp) && tmp[["HittedMaxTries"]]) ) { ## in C++ code return(f.out.attempts.cpp()) - } else if( + } else if( (exists("HittedWallTime", where = tmp) && tmp[["HittedWallTime"]]) ) { ## in C++ code @@ -250,12 +250,12 @@ oncoSimulSample <- function(Nindiv, UnrecoverExcept = FALSE )) } else if( attemptsLeft <= 0 ) { - ## it is very unlikely this will ever happen. + ## it is very unlikely this will ever happen. return(f.out.attempts()) } else if( as.double(difftime(Sys.time(), startTime, units = "secs")) > max.wall.time.total ) { return(f.out.time()) - } + } } } @@ -281,7 +281,7 @@ samplePop <- function(x, timeSample = "last", propError = 0) { ## timeSample <- match.arg(timeSample) gN <- geneNames - + if(!is.null(popSizeSample) && (length(popSizeSample) > 1) && (length(popSizeSample) != length(x))) { message("length popSizeSample != number of subjects") @@ -365,7 +365,7 @@ oncoSimulPop <- function(Nindiv, s = 0.1, sh = -1, K = initSize/(exp(1) - 1), - keepEvery = sampleEvery, + keepEvery = sampleEvery, minDetectDrvCloneSz = "auto", extraTime = 0, ## used to be this @@ -390,7 +390,7 @@ oncoSimulPop <- function(Nindiv, if(Nindiv < 1) stop("Nindiv must be >= 1") - + if(.Platform$OS.type == "windows") { if(mc.cores != 1) message("You are running Windows. Setting mc.cores = 1") @@ -432,7 +432,7 @@ oncoSimulPop <- function(Nindiv, ## mc.allow.recursive = FALSE ## FIXME: remove? ## done for covr issue ## https://github.com/r-lib/covr/issues/335#issuecomment-424116766 - + class(pop) <- "oncosimulpop" attributes(pop)$call <- match.call() return(pop) @@ -510,7 +510,7 @@ oncoSimulIndiv <- function(fp, ) if(initSize < 1) stop("initSize < 1") - + if( (K < 1) && (model %in% c("McFL", "McFarlandLog") )) { stop("Using McFarland's model: K cannot be < 1") } ## if ( !(model %in% c("McFL", "McFarlandLog") )) { @@ -555,7 +555,7 @@ oncoSimulIndiv <- function(fp, ## keepEvery <- -9 if(is_null_na(keepEvery)) keepEvery <- -9 - + if( (keepEvery > 0) & (keepEvery < sampleEvery)) { keepEvery <- sampleEvery warning("setting keepEvery <- sampleEvery") @@ -575,16 +575,16 @@ oncoSimulIndiv <- function(fp, if(is_null_na(finalTime)) finalTime <- Inf if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA") - + if(!inherits(fp, "fitnessEffects")) { - if(any(unlist(lapply(list(fp, + if(any(unlist(lapply(list(fp, numPassengers, s, sh), is.null)))) { m <- paste("You are using the old poset format.", "You must specify all of poset, numPassengers", "s, and sh.") stop(m) - + } if(AND_DrvProbExit) { stop("The AND_DrvProbExit = TRUE setting is invalid", @@ -592,7 +592,7 @@ oncoSimulIndiv <- function(fp, } if(!is.null(muEF)) stop("Mutator effects cannot be specified with the old poset format.") - if( length(initMutant) > 0) + if( length(initMutant) > 0) warning("With the old poset format you can no longer use initMutant.", " The initMutant you passed will be ignored.") ## stop("With the old poset, initMutant can only take a single value.") @@ -610,40 +610,40 @@ oncoSimulIndiv <- function(fp, if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects") ## if(message.v1) ## message("You are using the old poset format. Consider using the new one.") - - + + ## A simulation stops if cancer or finalTime appear, the first ## one. But if we set onlyCnacer = FALSE, we also accept simuls ## without cancer (or without anything) - + op <- try(oncoSimul.internal(poset = fp, ## restrict.table = rt, ## numGenes = numGenes, numPassengers = numPassengers, typeCBN = "CBN", birth = birth, s = s, - death = death, - mu = mu, - initSize = initSize, - sampleEvery = sampleEvery, - detectionSize = detectionSize, - finalTime = finalTime, - initSize_species = 2000, - initSize_iter = 500, - seed = seed, - verbosity = verbosity, - speciesFS = 10000, + death = death, + mu = mu, + initSize = initSize, + sampleEvery = sampleEvery, + detectionSize = detectionSize, + finalTime = finalTime, + initSize_species = 2000, + initSize_iter = 500, + seed = seed, + verbosity = verbosity, + speciesFS = 10000, ratioForce = 2, typeFitness = typeFitness, max.memory = max.memory, - mutationPropGrowth = mutationPropGrowth, - initMutant = -1, + mutationPropGrowth = mutationPropGrowth, + initMutant = -1, max.wall.time = max.wall.time, max.num.tries = max.num.tries, - keepEvery = keepEvery, - ## alpha = 0.0015, + keepEvery = keepEvery, + ## alpha = 0.0015, sh = sh, - K = K, + K = K, minDetectDrvCloneSz = minDetectDrvCloneSz, extraTime = extraTime, detectionDrivers = detectionDrivers, @@ -684,29 +684,29 @@ oncoSimulIndiv <- function(fp, if(AND_DrvProbExit) stop("It makes no sense to pass AND_DrvProbExit and a fixation list.") } - op <- try(nr_oncoSimul.internal(rFE = fp, + op <- try(nr_oncoSimul.internal(rFE = fp, birth = birth, - death = death, - mu = mu, - initSize = initSize, - sampleEvery = sampleEvery, - detectionSize = detectionSize, - finalTime = finalTime, - initSize_species = 2000, - initSize_iter = 500, - seed = seed, - verbosity = verbosity, - speciesFS = 10000, + death = death, + mu = mu, + initSize = initSize, + sampleEvery = sampleEvery, + detectionSize = detectionSize, + finalTime = finalTime, + initSize_species = 2000, + initSize_iter = 500, + seed = seed, + verbosity = verbosity, + speciesFS = 10000, ratioForce = 2, typeFitness = typeFitness, max.memory = max.memory, - mutationPropGrowth = mutationPropGrowth, - initMutant = initMutant, + mutationPropGrowth = mutationPropGrowth, + initMutant = initMutant, max.wall.time = max.wall.time, max.num.tries = max.num.tries, - keepEvery = keepEvery, - ## alpha = 0.0015, - K = K, + keepEvery = keepEvery, + ## alpha = 0.0015, + K = K, minDetectDrvCloneSz = minDetectDrvCloneSz, extraTime = extraTime, detectionDrivers = detectionDrivers, @@ -741,7 +741,7 @@ summary.oncosimul <- function(object, ...) { ## This should be present even in HittedWallTime and HittedMaxTries ## if those are not regarded as errors pbp <- ("pops.by.time" %in% names(object) ) - + if(object$other$UnrecoverExcept) { ## yes, when bailing out from ## except. can have just minimal ## content @@ -756,7 +756,7 @@ summary.oncosimul <- function(object, ...) { "NumDriversLargestPop", "TotalPresentDrivers", "FinalTime", "NumIter", "HittedWallTime", "HittedMaxTries")] - + tmp$errorMF <- object$other$errorMF tmp$minDMratio <- object$other$minDMratio tmp$minBMratio <- object$other$minBMratio @@ -816,7 +816,7 @@ summary.oncosimulpop <- function(object, ...) { ## So I need something more involved ## Figure out exactly what the summary of a NULL is sumnull <- summary(NULL) - + tmp <- lapply(object, summary) ## rm <- which(unlist(lapply(tmp, @@ -853,7 +853,7 @@ print.oncosimulpop <- function(x, ...) { plot.oncosimulpop <- function(x, ask = TRUE, - show = "drivers", + show = "drivers", type = ifelse(show == "genotypes", "stacked", "line"), col = "auto", @@ -953,7 +953,7 @@ plot.oncosimulpop <- function(x, ask = TRUE, ## else { ## ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE]) ## } - + ## if(is.null(yl)) { ## if(log %in% c("y", "xy", "yx") ) ## yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) @@ -970,15 +970,15 @@ plot.oncosimulpop <- function(x, ask = TRUE, ## par(op) ## m1[c(3)] <- 0.2 ## op <- par(mar = m1) -## par(fig = c(0, 1, 0, 0.8), new = TRUE) +## par(fig = c(0, 1, 0, 0.8), new = TRUE) ## } ## if(plotClones) { ## plotClones(x, -## ndr = ndr, +## ndr = ndr, ## xlab = xlab, ## ylab = ylab, ## lty = ltyClone, -## col = col, +## col = col, ## ylim = yl, ## lwd = lwdClone, ## axes = FALSE, @@ -988,7 +988,7 @@ plot.oncosimulpop <- function(x, ask = TRUE, ## if(plotClones && plotDrivers) ## par(new = TRUE) - + ## if(plotDrivers){ ## plotDrivers0(x, ## ndr, @@ -997,18 +997,18 @@ plot.oncosimulpop <- function(x, ask = TRUE, ## xlab = "", ylab = "", ## lwd = lwdDrivers, ## lty = ltyDrivers, -## col = col, +## col = col, ## addtot = addtot, ## addtotlwd = addtotlwd, ## log = log, ylim = yl, ## ...) ## } - + ## } plot.oncosimul <- function(x, - show = "drivers", + show = "drivers", type = ifelse(show == "genotypes", "stacked", "line"), col = "auto", @@ -1039,68 +1039,86 @@ plot.oncosimul <- function(x, vrange = c(0.8, 1), breakSortColors = "oe", legend.ncols = "auto", + muller.type = "frequency", ... ) { - if(!(type %in% c("stacked", "stream", "line"))) + if(!(type %in% c("stacked", "stream", "line", "muller"))) stop("Type of plot unknown: it must be one of", - "stacked, stream or line") + "stacked, stream, line or muller") + + if (type == "muller") { + simulation <- x - if(!(show %in% c("genotypes", "drivers"))) + if (!(class(simulation)[2] %in% c("oncosimul2"))) { + stop("Type of object class must be:", " oncosimul2") + } + + if(is.na(simulation[["other"]][["PhylogDF"]][1,1])) { + stop("Object simulation must has property:", " other$PhylogDF") + } + + if(!(muller.type %in% c("frequency", "population"))) + stop("Type of muller.plot unknown: it must be one of", + "frequency or population") + + plotMuller(simulation, muller.type) + } else { + if(!(show %in% c("genotypes", "drivers"))) stop("show must be one of ", "genotypes or drivers") - if(!(breakSortColors %in% c("oe", "distave", "random"))) + if(!(breakSortColors %in% c("oe", "distave", "random"))) stop("breakSortColors must be one of ", "oe, distave, or random") - - colauto <- FALSE - if(col == "auto" && (type == "line") && (show == "drivers")) + + colauto <- FALSE + if(col == "auto" && (type == "line") && (show == "drivers")) col <- c(8, "orange", 6:1) - if(col == "auto" && (show == "genotypes")) { + if(col == "auto" && (show == "genotypes")) { ## For categorical data, I find Dark2, Paired, or Set1 to work best. col <- colorRampPalette(brewer.pal(8, "Dark2"))(ncol(x$pops.by.time) - 1) colauto <- TRUE - } - - if(show == "genotypes") { + } + + if(show == "genotypes") { plotDrivers <- FALSE plotClones <- TRUE - } - - if(thinData) + } + + if(thinData) x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min) - if(!is.null(xlim)) + if(!is.null(xlim)) x <- xlim.pop.data(x, xlim) - - ## For genotypes, ndr is now the genotypes. Actually, ndr is now just - ## a sequence 1:(ncol(y) - 1) - ## The user will want to change the colors, like a colorRamp, etc. Or - ## rainbow. + ## For genotypes, ndr is now the genotypes. Actually, ndr is now just + ## a sequence 1:(ncol(y) - 1) - ## genotypes and line, always call plotDrivers0 - if(show == "drivers") { + ## The user will want to change the colors, like a colorRamp, etc. Or + ## rainbow. + + ## genotypes and line, always call plotDrivers0 + if(show == "drivers") { if(!inherits(x, "oncosimul2")) - ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE]) + ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE]) else { - ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE]) + ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE]) } - } else { ## show we are showing genotypes + } else { ## show we are showing genotypes ndr <- 1:(ncol(x$pops.by.time) - 1) - } - - if((type == "line") && is.null(ylim)) { + } + + if((type == "line") && is.null(ylim)) { if(log %in% c("y", "xy", "yx") ) - ylim <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) + ylim <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) else - ylim <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) - } - if(plotDiversity) { + ylim <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) + } + if(plotDiversity) { oppd <- par(fig = c(0, 1, 0.8, 1)) m1 <- par()$mar m <- m1 @@ -1110,14 +1128,14 @@ plot.oncosimul <- function(x, par(op) m1[c(3)] <- 0.2 op <- par(mar = m1) - par(fig = c(0, 1, 0, 0.8), new = TRUE) - } - - ## Shows its history: plotClones makes plotDrivers0 unneeded with - ## stacked and stream plots. But now so with line plot. - ## When showing genotypes, plotDrivers0 with line only used for - ## showing the legend. - if(plotClones) { + par(fig = c(0, 1, 0, 0.8), new = TRUE) + } + + ## Shows its history: plotClones makes plotDrivers0 unneeded with + ## stacked and stream plots. But now so with line plot. + ## When showing genotypes, plotDrivers0 with line only used for + ## showing the legend. + if(plotClones) { plotClonesSt(x, ndr = ndr, show = show, @@ -1125,7 +1143,7 @@ plot.oncosimul <- function(x, log = log, lwd = lwdClone, lty = ifelse(show == "drivers", ltyClone, ltyDrivers), - col = col, + col = col, order.method = order.method, stream.center = stream.center, stream.frac.rand = stream.frac.rand, @@ -1143,12 +1161,12 @@ plot.oncosimul <- function(x, ylim = ylim, xlim = xlim, ...) - } + } - if(plotClones && plotDrivers && (type == "line")) + if(plotClones && plotDrivers && (type == "line")) par(new = TRUE) - - if( plotDrivers && (type == "line") ) { + + if( plotDrivers && (type == "line") ) { plotDrivers0(x, ndr, timescale = 1, @@ -1156,18 +1174,19 @@ plot.oncosimul <- function(x, xlab = "", ylab = "", lwd = lwdDrivers, lty = ltyDrivers, - col = col, + col = col, addtot = addtot, addtotlwd = addtotlwd, log = log, ylim = ylim, xlim = xlim, legend.ncols = legend.ncols, ...) - } - if(plotDiversity) { + } + if(plotDiversity) { par(oppd) + } } - + } plotClonesSt <- function(z, @@ -1205,10 +1224,10 @@ plotClonesSt <- function(z, ## change it, but it does not seem reasonable. ## But my original plotting code runs faster and is simpler if 0 are ## dealt as NAs (which also makes log transformations simpler). - - if(type %in% c("stacked", "stream") ) + + if(type %in% c("stacked", "stream", "muller") ) na.subs <- FALSE - + if(na.subs){ y[y == 0] <- NA } @@ -1277,7 +1296,7 @@ plotClonesSt <- function(z, if(grepl("x", log)) { x <- log10(x + 1) } - + if (type == "stacked") { plot.stacked2(x = x, y = y, @@ -1290,23 +1309,23 @@ plotClonesSt <- function(z, ylab = ylab, ylim = ylim, xlim = xlim, - ...) + ...) } else if (type == "stream") { - plot.stream2(x = x, - y = y, - order.method = order.method, - border = border, - lwd = lwdStackedStream, - col = cll$colors, - frac.rand = stream.frac.rand, - spar = stream.spar, - center = stream.center, - log = log, - xlab = xlab, - ylab = ylab, - ylim = ylim, - xlim = xlim, - ...) + plot.stream2(x = x, + y = y, + order.method = order.method, + border = border, + lwd = lwdStackedStream, + col = cll$colors, + frac.rand = stream.frac.rand, + spar = stream.spar, + center = stream.center, + log = log, + xlab = xlab, + ylab = ylab, + ylim = ylim, + xlim = xlim, + ...) } if(show == "drivers") { if(legend.ncols == "auto") { @@ -1328,7 +1347,7 @@ plotClonesSt <- function(z, ldrv <- z$GenotypesLabels } ldrv[ldrv == ""] <- "WT" - ldrv[ldrv == " _ "] <- "WT" + ldrv[ldrv == " _ "] <- "WT" if(legend.ncols == "auto") { if(length(ldrv) > 6) legend.ncols <- 2 else legend.ncols <- 1 @@ -1366,11 +1385,11 @@ myhsvcols <- function(ndr, ymax, srange = c(0.4, 1), ## - different clones with same number of drivers have "similar" colors ## I use hsv color specification as this seems the most reasonable. - + minor <- table(ndr) major <- length(unique(ndr)) ## yeah same as length(minor), but least ## surprise - + h <- seq(from = 0, to = 1, length.out = major + 1)[-1] ## do not keep similar hues next to each other if(breakSortColors == "oe") { @@ -1382,13 +1401,13 @@ myhsvcols <- function(ndr, ymax, srange = c(0.4, 1), } else if(breakSortColors == "random") { rr <- order(runif(length(h))) h <- h[rr] - } - + } + hh <- rep(h, minor) - - sr <- unlist(lapply(minor, function(x) + + sr <- unlist(lapply(minor, function(x) seq(from = srange[1], to = srange[2], length.out = x))) - sv <- unlist(lapply(minor, function(x) + sv <- unlist(lapply(minor, function(x) seq(from = vrange[1], to = vrange[2], length.out = x)) ) @@ -1466,7 +1485,7 @@ plotDrivers0 <- function(x, tot <- rowSums(y, na.rm = TRUE) lines(time, tot, col = "black", lty = 1, lwd = addtotlwd) } - + ## This will work even if under the weird case of a driver missing ldrv <- unlist(lapply(strsplit(colnames(y), "dr_", fixed = TRUE), function(x) x[2])) @@ -1537,7 +1556,7 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) { if( (length(tG) == 1) && (tG == "")) { warning("There never was a descendant of WT") } - + df <- x$other$PhylogDF if(nrow(df) == 0) { warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared. ", @@ -1581,11 +1600,11 @@ plotClonePhylog <- function(x, N = 1, t = "last", pc <- phylogClone(x, N, t, keepEvents) ## if(is.na(pc)) { ## ## This should not be reachable, as caught before - ## ## where we check for nrow of PhylogDF + ## ## where we check for nrow of PhylogDF ## warning("No clone phylogeny available. Exiting without plotting.") ## return(NULL) ## } - + l0 <- igraph::layout.reingold.tilford(pc$g) if(!timeEvents) { plot(pc$g, layout = l0) @@ -1602,7 +1621,7 @@ plotClonePhylog <- function(x, N = 1, t = "last", l1[dx, 1] <- runif(length(dx), ra[1], ra[2]) } } - plot(pc$g, layout = l1) + plot(pc$g, layout = l1) } if(returnGraph) return(pc$g) @@ -1647,7 +1666,7 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) { } } else if (timeSample %in% c("uniform", "unif")) { candidate.time <- which(tmp$PerSampleStats[, 4] > 0) - + if (length(candidate.time) == 0) { warning(paste("There is not a single sampled time", "at which there are any mutants with drivers. ", @@ -1677,15 +1696,15 @@ get.mut.vector <- function(x, timeSample, typeSample, return(rep(NA, length(x$geneNames))) } the.time <- get.the.time.for.sample(x, timeSample, popSizeSample) - if(the.time < 0) { + if(the.time < 0) { return(rep(NA, nrow(x$Genotypes))) - } + } pop <- x$pops.by.time[the.time, -1] - + if(all(pop == 0)) { stop("You found a bug: this should never happen") } - + if(typeSample %in% c("wholeTumor", "whole")) { popSize <- x$PerSampleStats[the.time, 1] return( as.numeric((tcrossprod(pop, @@ -1721,15 +1740,15 @@ get.mut.vector <- function(x, timeSample, typeSample, ## return(rep(NA, length(x$geneNames))) ## } ## the.time <- get.the.time.for.sample(x, timeSample, popSizeSample) -## if(the.time < 0) { +## if(the.time < 0) { ## return(rep(NA, nrow(x$Genotypes))) -## } +## } ## pop <- x$pops.by.time[the.time, -1] - + ## if(all(pop == 0)) { ## stop("You found a bug: this should never happen") ## } - + ## if(typeSample %in% c("wholeTumor", "whole")) { ## popSize <- x$PerSampleStats[the.time, 1] ## return( as.numeric((tcrossprod(pop, @@ -1754,10 +1773,10 @@ get.mut.vector <- function(x, timeSample, typeSample, oncoSimul.internal <- function(poset, ## restrict.table, - numPassengers, + numPassengers, ## numGenes, typeCBN, - birth, + birth, s, death, mu, @@ -1778,7 +1797,7 @@ oncoSimul.internal <- function(poset, ## restrict.table, max.wall.time, keepEvery, alpha, - sh, + sh, K, ## endTimeEvery, detectionDrivers, @@ -1790,7 +1809,7 @@ oncoSimul.internal <- function(poset, ## restrict.table, extraTime) { ## the value of 20000, in megabytes, for max.memory sets a limit of ~ 20 GB - + ## if(keepEvery < sampleEvery) ## warning("setting keepEvery to sampleEvery") @@ -1831,7 +1850,7 @@ oncoSimul.internal <- function(poset, ## restrict.table, stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers") if(numDrivers > numGenes) stop("BAIL OUT NOW: numDrivers > numGenes") - + non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1] @@ -1864,13 +1883,13 @@ oncoSimul.internal <- function(poset, ## restrict.table, ## transpose the table rtC <- convertRestrictTable(restrict.table) - + return(c( BNB_Algo5(restrictTable = rtC, numDrivers = numDrivers, numGenes = numGenes, typeCBN_= typeCBN, - s = s, + s = s, death = death, mu = mu, initSize = initSize, @@ -1906,7 +1925,7 @@ oncoSimul.internal <- function(poset, ## restrict.table, OncoSimulWide2Long <- function(x) { ## Put data in long format, for ggplot et al - + if(!inherits(x, "oncosimul2")) { ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE]) genotLabels <- genotypeLabel(x) @@ -1918,7 +1937,7 @@ OncoSimulWide2Long <- function(x) { genotLabels[genotLabels == " _ "] <- "WT" y <- x$pops.by.time[, 2:ncol(x$pops.by.time), drop = FALSE] y[y == 0] <- NA - + oo <- order(ndr) y <- y[, oo, drop = FALSE] ndr <- ndr[oo] @@ -1949,7 +1968,7 @@ OncoSimulWide2Long <- function(x) { ## muts.by.time <- tmp$pops.by.time ## } ## return(muts.by.time) -## } +## } create.drivers.by.time <- function(tmp, ndr) { @@ -1970,7 +1989,7 @@ create.drivers.by.time <- function(tmp, ndr) { function(x) tapply(x, CountNumDrivers, - sum)))) + sum)))) } else { drivers.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE] , @@ -1986,7 +2005,7 @@ create.drivers.by.time <- function(tmp, ndr) { drivers.by.time <- NULL } return(drivers.by.time) -} +} @@ -2044,6 +2063,47 @@ is_null_na <- function(x) { } } +plotMuller <- function(simulation, muller.type) { + population <- simulation[["pops.by.time"]] + num_of_clones <- simulation[["NumClones"]] + time_points = population[,1] + genotypesLabels = simulation[["GenotypesLabels"]] + convert <- function(x) as.numeric(as.character(x)) + + # Parse simulation's population by time table into a ggmuller friendly + # population by time table + data <- as.vector(t(population[,2:(num_of_clones + 1)])) + dimnames <- list(cloneid = c(1:num_of_clones), time = time_points) + mat <- matrix(data, ncol = length(time_points), nrow = num_of_clones, dimnames = dimnames) + pop <- as.data.frame(as.table(mat)) + pop <- t(apply(pop, 1, convert)) + colnames(pop) <- c("Identity", "Generation", "Population") + pop <- pop[,c(2, 1, 3)] + pop <- as.data.frame(pop) + + # Parse phylogenetic tree from simulation into ggmuller format + phyloTree <- simulation[["other"]][["PhylogDF"]] + phyloTree <- as.data.frame(phyloTree) + phyloTree <- phyloTree[,-3] + phyloTree <- phyloTree[,-3] + phyloTree <- phyloTree[!duplicated(phyloTree), ] + edges <- t(apply(phyloTree, 1, function(x) match(x, genotypesLabels))) + edges <- as.data.frame(edges) + edges <- na.omit(edges) + edges <- t(apply(edges, 1, convert)) + rownames(edges) <- 1:nrow(edges) + colnames(edges) <- c("Parent", "Identity") + edges <- as.data.frame(edges) + + Muller_df <- get_Muller_df(edges, pop) + + if(muller.type == "population") { + Muller_pop_plot(Muller_df, add_legend = TRUE) + } else { + Muller_plot(Muller_df, add_legend = TRUE) + } +} + ## Not used anymore, but left here in case they become useful. ## Expected numbers at equilibrium under McFarland's @@ -2065,7 +2125,7 @@ is_null_na <- function(x) { ## mcflEv <- function(p, s, initSize) { ## ## expects vectors for p and s ## K <- initSize/(exp(1) - 1) - + ## ## Expected number at equilibrium ## return( K * (exp(prod((1 + s)^p)) - 1)) ## } @@ -2089,7 +2149,7 @@ is_null_na <- function(x) { ## } ## plotSimpson <- function(z) { - + ## h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE], ## 1, shannonI) ## plot(x = z$pops.by.time[, 1], @@ -2105,7 +2165,7 @@ is_null_na <- function(x) { ## ## drivers are plotted last ## y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE] - + ## if(na.subs){ ## y[y == 0] <- NA ## } @@ -2151,8 +2211,8 @@ is_null_na <- function(x) { ## num.genes <- max(poset) - 1 ## as root is not a gene ## genotype <-t(c(1, rep(NA, num.genes))) ## colnames(genotype) <- as.character(0:num.genes) - - + + ## poset$runif <- runif(nrow(poset)) ## ## this.relation.prob.OK could be done outside, but having it inside ## ## the loop would allow to use different thresholds for different @@ -2160,15 +2220,15 @@ is_null_na <- function(x) { ## for (i in (1:nrow(poset))) { ## child <- poset[i, 2] ## this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p) -## the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. +## the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. ## if (is.na(genotype[child])){ -## genotype[child] <- this.relation.prob.OK * the.parent +## genotype[child] <- this.relation.prob.OK * the.parent ## } ## else ## genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent) ## } ## ## } - + ## return(genotype) ## } @@ -2184,13 +2244,13 @@ is_null_na <- function(x) { ## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) { ## ## Obtain, from results from a simulation run, the vector ## ## of 0/1 corresponding to each gene. - + ## ## threshold is the min. proportion for a mutation to be detected ## ## We are doing whole tumor sampling here, as in Sprouffske ## ## timeSample: do we sample at end, or at a time point, chosen ## ## randomly, from all those with at least one driver? - + ## if(timeSample == "last") { ## if(tmp$TotalPopSize == 0) ## warning(paste("Final population size is 0.", @@ -2201,7 +2261,7 @@ is_null_na <- function(x) { ## tmp$Genotypes)/tmp$TotalPopSize) > threshold)) ## } else if (timeSample %in% c("uniform", "unif")) { ## candidate.time <- which(tmp$PerSampleStats[, 4] > 0) - + ## if (length(candidate.time) == 0) { ## warning(paste("There is not a single sampled time", ## "at which there are any mutants.", @@ -2231,19 +2291,19 @@ is_null_na <- function(x) { ## warning(paste("There are no clones with drivers at any time point.", ## "No uniform sampling possible.", ## "You will get a vector of NAs.")) -## return(rep(NA, nrow(tmp$Genotypes))) +## return(rep(NA, nrow(tmp$Genotypes))) ## } ## get.mut.vector.singlecell <- function(tmp, timeSample = "last") { ## ## No threshold, as single cell. ## ## timeSample: do we sample at end, or at a time point, chosen ## ## randomly, from all those with at least one driver? - + ## if(timeSample == "last") { ## the.time <- nrow(tmp$pops.by.time) ## } else if (timeSample %in% c("uniform", "unif")) { ## candidate.time <- which(tmp$PerSampleStats[, 4] > 0) - + ## if (length(candidate.time) == 0) { ## warning(paste("There is not a single sampled time", ## "at which there are any mutants.", @@ -2314,6 +2374,6 @@ is_null_na <- function(x) { ## l1[dx, 1] <- runif(length(dx), ra[1], ra[2]) ## } ## } -## plot(g, layout = l1) +## plot(g, layout = l1) ## } ## } diff --git a/OncoSimulR/R/stacked-plots.R b/OncoSimulR/R/stacked-plots.R index 43571f3e..310158a6 100644 --- a/OncoSimulR/R/stacked-plots.R +++ b/OncoSimulR/R/stacked-plots.R @@ -21,19 +21,19 @@ -## plot.stream makes a "stream plot" where each y series is plotted +## plot.stream makes a "stream plot" where each y series is plotted ## as stacked filled polygons on alternating sides of a baseline. ## Arguments include: ## 'x' - a vector of values ## 'y' - a matrix of data series (columns) corresponding to x -## 'order.method' = c("as.is", "max", "first") +## 'order.method' = c("as.is", "max", "first") ## "as.is" - plot in order of y column ## "max" - plot in order of when each y series reaches maximum value ## "first" - plot in order of when each y series first value > 0 ## 'center' - if TRUE, the stacked polygons will be centered so that the middle, -## i.e. baseline ("g0"), of the stream is approximately equal to zero. -## Centering is done before the addition of random wiggle to the baseline. +## i.e. baseline ("g0"), of the stream is approximately equal to zero. +## Centering is done before the addition of random wiggle to the baseline. ## 'frac.rand' - fraction of the overall data "stream" range used to define the range of ## random wiggle (uniform distrubution) to be added to the baseline 'g0' ## 'spar' - setting for smooth.spline function to make a smoothed version of baseline "g0" @@ -43,11 +43,11 @@ ## '...' - other plot arguments plot.stream2 <- function( - x, y, + x, y, order.method = "as.is", frac.rand=0.1, spar=0.2, center=TRUE, - ylab="", xlab="", - border = NULL, lwd=1, + ylab="", xlab="", + border = NULL, lwd=1, col=rainbow(length(y[1,])), ylim=NULL, log = "", @@ -60,7 +60,7 @@ plot.stream2 <- function( border <- as.vector(matrix(border, nrow=ncol(y), ncol=1)) col <- as.vector(matrix(col, nrow=ncol(y), ncol=1)) lwd <- as.vector(matrix(lwd, nrow=ncol(y), ncol=1)) - + if(order.method == "max") { ord <- order(apply(y, 2, which.max)) y <- y[, ord] @@ -98,7 +98,7 @@ plot.stream2 <- function( mid <- apply(outer.lims, 1, function(r) mean(c(max(r, na.rm=TRUE), min(r, na.rm=TRUE)), na.rm=TRUE)) - + ## center and wiggle if(center) { g0 <- -mid + runif(length(x), @@ -133,15 +133,13 @@ plot.stream2 <- function( } } - - ## plot.stacked makes a stacked plot where each y series is plotted on top ## of the each other using filled polygons ## Arguments include: ## 'x' - a vector of values ## 'y' - a matrix of data series (columns) corresponding to x -## 'order.method' = c("as.is", "max", "first") +## 'order.method' = c("as.is", "max", "first") ## "as.is" - plot in order of y column ## "max" - plot in order of when each y series reaches maximum value ## "first" - plot in order of when each y series first value > 0 @@ -151,10 +149,10 @@ plot.stream2 <- function( ## '...' - other plot arguments plot.stacked2 <- function( - x, y, + x, y, order.method = "as.is", - ylab="", xlab="", - border = NULL, lwd=1, + ylab="", xlab="", + border = NULL, lwd=1, col=rainbow(length(y[1,])), ylim=NULL, log = "", diff --git a/OncoSimulR/man/plot.oncosimul.Rd b/OncoSimulR/man/plot.oncosimul.Rd old mode 100644 new mode 100755 index e7933cc7..0dd9f065 --- a/OncoSimulR/man/plot.oncosimul.Rd +++ b/OncoSimulR/man/plot.oncosimul.Rd @@ -16,7 +16,7 @@ and clones with different number of drivers are plotted in different colours. Plots can alternatively display genotypes instead of drivers. - Plots available are line plots, stacked area, and stream plots. + Plots available are line plots, stacked area, stream plots and Muller plots. } @@ -53,7 +53,9 @@ srange = c(0.4, 1), vrange = c(0.8, 1), breakSortColors = "oe", - legend.ncols = "auto", ...) + legend.ncols = "auto", + muller.type = "frequency", + ...) \method{plot}{oncosimulpop}(x, ask = TRUE, @@ -88,6 +90,7 @@ vrange = c(0.8, 1), breakSortColors = "oe", legend.ncols = "auto", + muller.type = "frequency", ...) } @@ -109,22 +112,29 @@ will be an unmanageable mess). The default is "drivers". } - \item{type}{One of "line", "stacked", "stream". + \item{type}{One of "line", "stacked", "stream", "muller". If "line", you are shown lines for each genotype or clone. This means that to get an idea of the total population size you need to use \code{plotDrivers = TRUE} with \code{addtot = TRUE}, or do the visual calculation in your head. - If "stacked" a stacked area plot. If "stream" a stream plot. Since - these stack areas, you immediately get the total population. But that - also means you cannot use \code{log}. + If "stacked" a stacked area plot. If "stream" a stream plot. If "muller" + a Muller plot. Since these stack areas, you immediately get the total + population. But that also means you cannot use \code{log}. The default is to use "line" for \code{show = "drivers"} and "stacked" for \code{show = "genotypes"}. } + \item{muller.type}{One of "frequency", "population". + + If "frequency", it shows the frecuency of each clone. If "population"", + shows changes in population sizes. + + The default is to use "frequecy". + } \item{col}{ Colour of the lines/areas. For \code{show = "drivers"} each type of clone (where type is defined by number of drivers) has @@ -306,7 +316,6 @@ entries, and two for more than six. } - \item{\dots}{ Other arguments passed to \code{plots}. For instance, \code{main}. diff --git a/OncoSimulR/src-i386/BNB_nr.cpp b/OncoSimulR/src-i386/BNB_nr.cpp new file mode 100644 index 00000000..86b76320 --- /dev/null +++ b/OncoSimulR/src-i386/BNB_nr.cpp @@ -0,0 +1,2527 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + + +// #include "OncoSimul.h" +// #include "randutils.h" //Nope, until we have gcc-4.8 in Win; full C++11 +#include "debug_common.h" +#include "common_classes.h" +#include "bnb_common.h" +#include "new_restrict.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Rcpp; +using std::vector; + +// To track if mutation is really much smaller than birth/death +#define MIN_RATIO_MUTS_NR +#ifdef MIN_RATIO_MUTS_NR +// There is really no need for these to be globals? +// Unless I wanted to use them inside some function. So leave as globals. +double g_min_birth_mut_ratio_nr = DBL_MAX; +double g_min_death_mut_ratio_nr = DBL_MAX; +double g_tmp1_nr = DBL_MAX; +#endif + + +void nr_fitness(spParamsP& tmpP, + const spParamsP& parentP, + const Genotype& ge, + const fitnessEffectsAll& F, + const TypeModel typeModel) { + // const double& genTime, + // const double& adjust_fitness_B, + // const double& adjust_fitness_MF) { + + // We want a way to signal immediate non-viability of a clone. For + // "birth-based" models that happens when any s = -1, as the fitness is + // 0. By setting birth = 0.0 we ensure this clone does not get added and + // we never reach into algo2, etc, leading to numerical problems. + + // With Bozic models, which are "death-based", it is different. For + // bozic2, birth is bounded, so any death > 2 would lead to birth < + // 0. For bozic1, deaths of around 50 lead to numerical issues. The + // general rule is: set those mutations to -inf, so prodDeathFitness + // returns an inf for death, and that is recognized as "no + // viability" (anything with death > 99) + + // The ones often used are bozic1, exp, mcfarlandlog + + if(typeModel == TypeModel::bozic1) { + tmpP.death = prodDeathFitness(evalGenotypeFitness(ge, F)); + if( tmpP.death > 99) { + tmpP.birth = 0.0; + } else { + tmpP.birth = 1.0; + } + // } else if (typeModel == TypeModel::bozic2) { + // double pp = prodDeathFitness(evalGenotypeFitness(ge, F)); + // tmpP.birth = std::max(0.0, (1.0/genTime) * (1.0 - 0.5 * pp )); + // tmpP.death = (0.5/genTime) * pp; + } else { + double fitness = prodFitness(evalGenotypeFitness(ge, F)); + if( fitness <= 0.0) { + tmpP.absfitness = 0.0; + tmpP.death = 1.0; + tmpP.birth = 0.0; + } else{ + // Set appropriate defaults and change only as needed + tmpP.death = parentP.death; + tmpP.absfitness = parentP.absfitness; + tmpP.birth = fitness; + // exp, mcfarland, and mcfarlandlog as above. Next are the two exceptions. + // if(typeModel == TypeModel::beerenwinkel) { + // tmpP.absfitness = fitness; + // tmpP.birth = adjust_fitness_B * tmpP.absfitness; + // } else if(typeModel == TypeModel::mcfarland0) { + // tmpP.absfitness = fitness; + // tmpP.birth = adjust_fitness_MF * tmpP.absfitness; + // } + } + } + // Exp and McFarland and McFarlandlog are also like Datta et al., 2013 + // An additional driver gene mutation increases a cell’s fitness by a + // factor of (1+sd), whereas an additional housekeeper gene mutation + // decreases fitness by a factor of (1-sh) and the effect of multiple + // mutations is multiplicative +} + + +// is this really any faster than the one below? +inline void new_sp_v(unsigned int& sp, + const Genotype& newGenotype, + const std::vector Genotypes) { + sp = 0; + for(sp = 0; sp < Genotypes.size(); ++sp) { + if( newGenotype == Genotypes[sp] ) + break; + } +} + +inline unsigned int new_sp(const Genotype& newGenotype, + const std::vector Genotypes) { + for(unsigned int sp = 0; sp < Genotypes.size(); ++sp) { + if( newGenotype == Genotypes[sp] ) { + return sp; + } + } + return Genotypes.size(); +} + +void remove_zero_sp_nr(std::vector& sp_to_remove, + std::vector& Genotypes, + std::vector& popParams, + std::multimap& mapTimes) { + std::vector::iterator popParams_begin = popParams.begin(); + std::vector::iterator Genotypes_begin = Genotypes.begin(); + std::vector::reverse_iterator r = sp_to_remove.rbegin(); + int remove_this; + while(r != sp_to_remove.rend() ) { + remove_this = *r; + mapTimes.erase(popParams[remove_this].pv); + popParams.erase(popParams_begin + remove_this); + Genotypes.erase(Genotypes_begin + remove_this); + ++r; + } +} + + +inline void driverCounts(int& maxNumDrivers, + int& totalPresentDrivers, + std::vector& countByDriver, + std::vector& presentDrivers, + Rcpp::IntegerMatrix& returnGenotypes, + const vector& drv){ + // Fill up the "countByDriver" table, how many genotypes each driver is + // present. Return the maximum number of mutated drivers in any + // genotype, the vector with just the present drivers, and the total + // number of present drivers. + + // We used to do count_NumDrivers and then whichDrivers + maxNumDrivers = 0; + int tmpdr = 0; + int driver_indx = 0; // the index in the driver table + for(int j = 0; j < returnGenotypes.ncol(); ++j) { + tmpdr = 0; + driver_indx = 0; + for(int i : drv) { + tmpdr += returnGenotypes(i - 1, j); + countByDriver[driver_indx] += returnGenotypes(i - 1, j); + ++driver_indx; + } + if(tmpdr > maxNumDrivers) maxNumDrivers = tmpdr; + } + if(returnGenotypes.ncol() > 0) { + STOPASSERT(driver_indx == static_cast( countByDriver.size())); + } else { + STOPASSERT(driver_indx <= static_cast( countByDriver.size())); + } + for(size_t i = 0; i < countByDriver.size(); ++i) { + if(countByDriver[i] > 0) { + presentDrivers.push_back(i + 1); + ++totalPresentDrivers; + } + } +} + + + +// FIXME: why not keep the number of present drivers in the genotype? We +// call often the getGenotypeDrivers(ge, drv).size() + + +void nr_totPopSize_and_fill_out_crude_P(int& outNS_i, + double& totPopSize, + double& lastStoredSample, + std::vector& genot_out, + //std::vector& sp_id_out, + std::vector& popSizes_out, + std::vector& index_out, + std::vector& time_out, + std::vector& sampleTotPopSize, + std::vector& sampleLargestPopSize, + std::vector& sampleMaxNDr, + std::vector& sampleNDrLargestPop, + bool& simulsDone, + bool& reachDetection, + int& lastMaxDr, + double& done_at, + const std::vector& Genotypes, + const std::vector& popParams, + const double& currentTime, + const double& keepEvery, + const double& detectionSize, + const double& finalTime, + // const double& endTimeEvery, + const int& detectionDrivers, + const int& verbosity, + const double& minDetectDrvCloneSz, + const double& extraTime, + const vector& drv, + const double& cPDetect, + const double& PDBaseline, + const double& checkSizePEvery, + double& nextCheckSizeP, + std::mt19937& ran_gen, + const bool& AND_DrvProbExit, + const std::vector >& fixation_l, + const double& fixation_tolerance, + const int& min_successive_fixation, + const double& fixation_min_size, + int& num_successive_fixation, + POM& pom, + const std::map& intName, + const fitness_as_genes& genesInFitness, + const double& fatalPopSize = 1e15 + ) { + // Fill out, but also compute totPopSize + // and return sample summaries for popsize, drivers. + + // This determines if we are done or not by checking popSize, number of + // drivers, etc + + // static int lastMaxDr = 0; // preserves value across calls to Algo5 from R. + // so can not use it. + bool storeThis = false; + bool checkSizePNow = false; + totPopSize = 0.0; + + // this could all be part of popSize_over_m_dr, with a better name + int tmp_ndr = 0; + int max_ndr = 0; + double popSizeOverDDr = 0.0; + + for(size_t i = 0; i < popParams.size(); ++i) { + totPopSize += popParams[i].popSize; + tmp_ndr = getGenotypeDrivers(Genotypes[i], drv).size(); + if(tmp_ndr > max_ndr) max_ndr = tmp_ndr; + if(tmp_ndr >= detectionDrivers) popSizeOverDDr += popParams[i].popSize; + } + lastMaxDr = max_ndr; + + // Until fixation done here. Recall we use an OR operation for exiting + // below. Could be added to loop above. + // And we call allGenesinGenotype also above, inside getGenotypeDrivers. + // So room for speed ups? + + // Since we convert each genotype to a sorted allGenesinGenotype, iterate + // over that first. Add that pop size if the combination is present in genotype. + bool fixated = false; + if(totPopSize > 0) { // Avoid silly things + if( fixation_l.size() ) { + std::vector popSize_fixation(fixation_l.size()); + for(size_t i = 0; i < popParams.size(); ++i) { + std::vector thisg = allGenesinGenotype(Genotypes[i]); + for(size_t fc = 0; fc != popSize_fixation.size(); ++fc) { + // Yes, fixation_l is sorted in R. + // if fixation_l[fc] starts with a -9, we are asking + // for exact genotype equality + if(fixation_l[fc][0] == -9) { + // // exact genotype identity? + std::vector this_fix(fixation_l[fc].begin() + 1, + fixation_l[fc].end()); + if(thisg == this_fix) { + popSize_fixation[fc] = popParams[i].popSize; + } + } else { + // gene combination in fixation element present in genotype? + if(std::includes(thisg.begin(), thisg.end(), + fixation_l[fc].begin(), fixation_l[fc].end()) ) { + popSize_fixation[fc] += popParams[i].popSize; + } + } + } + } + // Any fixated? But avoid trivial of totPopSize of 0! + // Now check of > 0 is redundant as we check totPopSize > 0 + // FIXME do we want tolerance around that value? + double max_popSize_fixation = + *std::max_element(popSize_fixation.begin(), popSize_fixation.end()); + if( (max_popSize_fixation >= fixation_min_size ) && + (max_popSize_fixation >= (totPopSize * (1 - fixation_tolerance) )) ) { + ++num_successive_fixation; + // DP1("increased num_successive_fixation"); + if( num_successive_fixation >= min_successive_fixation) fixated = true; + } else { + // DP1("zeroed num_successive_fixation"); + num_successive_fixation = 0; + } + } + } + + // // DEBUG + // if(fixated) { + // // print fixation_l + // // print popSize_fixation + // // print totPopSize + // DP1("popSize_fixation"); + // for(size_t fc = 0; fc != popSize_fixation.size(); ++fc) { + // DP2(fc); + // DP2(popSize_fixation[fc]); + // } + // DP2(totPopSize); + + // } + + if (keepEvery < 0) { + storeThis = false; + } else if( currentTime >= (lastStoredSample + keepEvery) ) { + storeThis = true; + } + + if( (totPopSize <= 0.0) || (currentTime >= finalTime) ) { + simulsDone = true; + } + + + // FIXME + // this is the usual exit condition + // (totPopSize >= detectionSize) || + // ( (lastMaxDr >= detectionDrivers) && + // (popSizeOverDDr >= minDetectDrvCloneSz) + + // Now add the prob. of exiting. + + // Doing this is cheaper than drawing unnecessary runifs. + // Equality, below, leads to suprises with floating point arith. + + // Operates the same as we do with keepEvery, but here we + // compute the jump in each accepted sample. And here we use >, not + // >= + if(currentTime > nextCheckSizeP) { + checkSizePNow = true; + nextCheckSizeP = currentTime + checkSizePEvery; + // Nope; minimal jump can be smaller than checkSizePEvery + // nextCheckSizeP += checkSizePEvery; + } else { + checkSizePNow = false; + } + + // We do not verify that conditions for exiting are also satisfied + // at the exit time when extraTime > 0. We could do that, + // checking again for the conditions (or the reasonable conditions, so + // probably not detectSizeP). For instance, with fixated. + + // For fixated in particular, note that we evaluate fixation always, but + // we might be exiting when there is no longer fixation. But the logic + // with fixation is probably to use as large a min_successive_fixation + // as desired and no extraTime. + + // Probably would not need to check lastMaxDr and popSizeOverDDr + // as those should never decrease. Really?? FIXME + + + if(AND_DrvProbExit) { + // The AND of detectionProb and drivers + // fixated plays no role here, and cannot be passed from R + if(extraTime > 0) { + if(done_at < 0) { + if( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) && + checkSizePNow && + detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { + done_at = currentTime + extraTime; + } + } else if (currentTime >= done_at) { + simulsDone = true; + reachDetection = true; + } + } else if( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) && + checkSizePNow && + detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { + simulsDone = true; + reachDetection = true; + } + } else { + // The usual OR mechanism of each option + if(extraTime > 0) { + if(done_at < 0) { + if( (fixated) || + (totPopSize >= detectionSize) || + ( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) ) || + ( checkSizePNow && + detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) { + done_at = currentTime + extraTime; + } + } else if (currentTime >= done_at) { + // if(fixated) { + simulsDone = true; + reachDetection = true; + // } else { + // done_at = -1; + // } + } + } else if( (fixated) || + (totPopSize >= detectionSize) || + ( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) ) || + ( checkSizePNow && + detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) { + simulsDone = true; + reachDetection = true; + } + } + + + // if( checkSizePNow && (lastMaxDr >= detectionDrivers) && + // detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { + // simulsDone = true; + // reachDetection = true; + // } + + + if(totPopSize >= fatalPopSize) { + Rcpp::Rcout << "\n\totPopSize > " << fatalPopSize + <<". You are likely to loose precision and run into numerical issues\n"; + } + + if(simulsDone) + storeThis = true; + + + + + // Reuse some info for POM + + if( storeThis ) { + lastStoredSample = currentTime; + outNS_i++; + int ndr_lp = 0; + double l_pop_s = 0.0; + int largest_clone = -99; + + time_out.push_back(currentTime); + + for(size_t i = 0; i < popParams.size(); ++i) { + genot_out.push_back(Genotypes[i]); + popSizes_out.push_back(popParams[i].popSize); + index_out.push_back(outNS_i); + + if(popParams[i].popSize > l_pop_s) { + l_pop_s = popParams[i].popSize; + ndr_lp = getGenotypeDrivers(Genotypes[i], drv).size(); + largest_clone = i; + } + } + sampleTotPopSize.push_back(totPopSize); + sampleLargestPopSize.push_back(l_pop_s); + sampleMaxNDr.push_back(max_ndr); + sampleNDrLargestPop.push_back(ndr_lp); + + if(l_pop_s > 0) { + if (largest_clone < 0) + throw std::logic_error("largest_clone < 0"); + addToPOM(pom, Genotypes[largest_clone], intName, genesInFitness); + } else { + addToPOM(pom, "_EXTINCTION_"); + } + } + + if( !std::isfinite(totPopSize) ) { + throw std::range_error("totPopSize not finite"); + } + if( std::isnan(totPopSize) ) { + throw std::range_error("totPopSize is NaN"); + } + // For POM + if( !storeThis ) { + double l_pop_s = 0.0; + int largest_clone = -99; + for(size_t i = 0; i < popParams.size(); ++i) { + if(popParams[i].popSize > l_pop_s) { + l_pop_s = popParams[i].popSize; + largest_clone = i; + } + } + if(l_pop_s > 0) { + if (largest_clone < 0) + throw std::logic_error("largest_clone < 0"); + addToPOM(pom, Genotypes[largest_clone], intName, genesInFitness); + } else { + addToPOM(pom, "_EXTINCTION_"); + } + } +} + +// FIXME: I might want to return the actual drivers in each period +// and the actual drivers in the population with largest popsize +// Something like what we do now with whichDrivers +// and count_NumDrivers + + + +inline void nr_reshape_to_outNS(Rcpp::NumericMatrix& outNS, + const vector >& uniqueGenotV, + const vector >& genot_out_v, + const vector& popSizes_out, + const vector& index_out, + const vector& time_out){ + + vector >::const_iterator fbeg = uniqueGenotV.begin(); + vector >::const_iterator fend = uniqueGenotV.end(); + + int column; + + for(size_t i = 0; i < genot_out_v.size(); ++i) { + column = std::distance(fbeg, lower_bound(fbeg, fend, genot_out_v[i]) ); + outNS(index_out[i], column + 1) = popSizes_out[i]; + } + + for(size_t j = 0; j < time_out.size(); ++j) + outNS(j, 0) = time_out[j]; +} + + +Rcpp::NumericMatrix create_outNS(const vector >& uniqueGenotypes, + const vector >& genot_out_v, + const vector& popSizes_out, + const vector& index_out, + const vector& time_out, + const int outNS_i, const int maxram) { + // The out.ns in R code; holder of popSizes over time + // The first row is time, then the genotypes (in column major) + // here("after uniqueGenotypes_to_vector"); + + int outNS_r, outNS_c, create_outNS; + if( ( (uniqueGenotypes.size() + 1) * (outNS_i + 1) ) > ( pow(2, 31) - 1 ) ) { + Rcpp::Rcout << "\nWARNING: Return outNS object > 2^31 - 1. Not created.\n"; + outNS_r = 1; + outNS_c = 1; + create_outNS = 0; + } else if ( + static_cast((uniqueGenotypes.size()+1) * (outNS_i+1)) * 8 > + (maxram * (1024*1024) ) ) { + Rcpp::Rcout << "\nWARNING: Return outNS object > maxram. Not created.\n"; + outNS_r = 1; + outNS_c = 1; + create_outNS = 0; + } else { + outNS_r = outNS_i + 1; + outNS_c = uniqueGenotypes.size() + 1; + create_outNS = 1; + } + Rcpp::NumericMatrix outNS(outNS_r, outNS_c); + if(create_outNS) { + nr_reshape_to_outNS(outNS, uniqueGenotypes, + genot_out_v, + popSizes_out, + index_out, time_out); + + } else { + outNS(0, 0) = -99; + } + return outNS; +} + + + +// FIXME: when creating the 0/1, collapse those that are the same + + +vector< vector > uniqueGenot_vector(vector >& genot_out) { + // From genot_out we want the unique genotypes, but each as a single + // vector. Convert to the vector, then use a set to give unique sorted + // vector. + std::set > uniqueGenotypes_nr(genot_out.begin(), + genot_out.end()); + std::vector > uniqueGenotypes_vector_nr (uniqueGenotypes_nr.begin(), + uniqueGenotypes_nr.end()); + return uniqueGenotypes_vector_nr; +} + + + +std::vector > genot_to_vectorg(const std::vector& go) { + std::vector > go_l; + std::transform(go.begin(), go.end(), back_inserter(go_l), genotypeSingleVector); + return go_l; +} + + + +std::string driversToNameString(const std::vector& presentDrivers, + const std::map& intName) { + std::string strDrivers; + std::string comma = ""; + for(auto const &g : presentDrivers) { + strDrivers += (comma + intName.at(g)); + comma = ", "; + } + return strDrivers; +} + +// No longer used. +// std::string genotypeToIntString(const std::vector& genotypeV, +// const fitness_as_genes& fg) { + +// // The genotype vectors are returned as a string of ints. + +// std::string strGenotype; + +// std::vector order_int; +// std::vector rest_int; + +// for(auto const &g : genotypeV) { +// if( binary_search(fg.orderG.begin(), fg.orderG.end(), g)) { +// order_int.push_back(g); +// } else { +// rest_int.push_back(g); +// } +// } + +// std::string order_sep = "_"; +// std::string order_part; +// std::string rest; +// std::string comma = ""; + + +// for(auto const &g : order_int) { +// #ifdef _WIN32 +// order_part += (comma + SSTR(g)); +// #endif +// #ifndef _WIN32 +// order_part += (comma + std::to_string(g)); +// #endif +// comma = ", "; +// } +// comma = ""; +// for(auto const &g : rest_int) { +// #ifdef _WIN32 +// rest += (comma + SSTR(g)); +// #endif +// #ifndef _WIN32 +// rest += (comma + std::to_string(g)); +// #endif +// comma = ", "; +// } +// if(fg.orderG.size()) { +// strGenotype = order_part + order_sep + rest; +// } else { +// strGenotype = rest; +// } +// return strGenotype; +// } + + +std::string genotypeToNameString(const std::vector& genotypeV, + const fitness_as_genes& fg, + const std::map& intName) { + + // The genotype vectors are returned as a string of names. Similar to + // the Int version, but we map here to names. + + // As the fitness is stored in terms of modules, not genes, we need to + // check if a _gene_ is in the order part or not by mapping back to + // modules. That is the fitness_as_genes argument. + + std::string strGenotype; + + std::vector order_int; + std::vector rest_int; + + for(auto const &g : genotypeV) { + if( binary_search(fg.orderG.begin(), fg.orderG.end(), g)) { + order_int.push_back(g); + } else { + rest_int.push_back(g); + } + } + + std::string order_sep = " _ "; + std::string order_part; + std::string rest; + std::string comma = ""; + + // FIXME: when sure no problems, remove at if needed for speed. + for(auto const &g : order_int) { + order_part += (comma + intName.at(g)); + comma = " > "; // comma = ", "; + } + comma = ""; + for(auto const &g : rest_int) { + rest += (comma + intName.at(g)); + comma = ", "; + } + if(fg.orderG.size()) { + strGenotype = order_part + order_sep + rest; + } else { + strGenotype = rest; + } + return strGenotype; +} + + +std::vector genotypesToNameString(const std::vector< vector >& uniqueGenotypesV, + const fitness_as_genes fg, + // const fitnessEffectsAll& F, + const std::map& intName) { + //fitness_as_genes fg = fitnessAsGenes(F); // I use this before; + std::vector gs; + for(auto const &v: uniqueGenotypesV ) + gs.push_back(genotypeToNameString(v, fg, intName)); + return gs; +} + + +// std::vector genotypesToString(const std::vector< vector >& uniqueGenotypesV, +// const fitnessEffectsAll& F, +// bool names = true) { +// fitness_as_genes fg = fitnessAsGenes(F); +// std::vector gs; + +// if(names) { +// std::map intName = mapGenesIntToNames(F); +// for(auto const &v: uniqueGenotypesV ) +// gs.push_back(genotypeToNameString(v, fg, intName)); +// } else { +// for(auto const &v: uniqueGenotypesV ) +// gs.push_back(genotypeToIntString(v, fg)); +// } + +// // exercise: do it with lambdas +// // std::transform(uniqueGenotypesV.begin(), uniqueGenotypesV.end(), +// // back_inserter(gs), vectorGenotypeToString); +// return gs; +// } + +Rcpp::IntegerMatrix nr_create_returnGenotypes(const int& numGenes, + const std::vector< vector >& uniqueGenotypesV){ + // We loose order here. Thus, there might be several identical columns. + Rcpp::IntegerMatrix returnGenotypes(numGenes, uniqueGenotypesV.size()); + for(size_t i = 0; i < uniqueGenotypesV.size(); ++i) { + for(int j : uniqueGenotypesV[i]) { + returnGenotypes(j - 1, i) = 1; + } + } + return returnGenotypes; +} + + + + + + +static void nr_sample_all_pop_P(std::vector& sp_to_remove, + std::vector& popParams, + // next only used with DEBUGV + const std::vector& Genotypes, + const double& tSample, + const int& mutationPropGrowth){ + sp_to_remove.clear(); + + for(size_t i = 0; i < popParams.size(); i++) { + STOPASSERT(popParams[i].timeLastUpdate >= 0.0); + STOPASSERT(tSample - popParams[i].timeLastUpdate >= 0.0); +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.9 ******\n " + << " Species = " << i + << "\n Genotype = "; + print_Genotype(Genotypes[i]); //genotypeSingleVector(Genotypes[i]) + // << "\n sp_id = " << genotypeSingleVector(Genotypes[i]) // sp_id[i] + Rcpp::Rcout << "\n pre-update popSize = " + << popParams[i].popSize + << "\n time of sample = " << tSample + << "\n popParams[i].timeLastUpdate = " + << popParams[i].timeLastUpdate + << ";\n t for Algo2 = " + << tSample - popParams[i].timeLastUpdate + << " \n species R " << popParams[i].R + << " \n species W " << popParams[i].W + << " \n species death " << popParams[i].death + << " \n species birth " << popParams[i].birth; +#endif + + // Account for forceSampling. When + // forceSampling, popSize for at least one species + // was updated in previous loop, so we skip that one + if(tSample > popParams[i].timeLastUpdate) { + popParams[i].popSize = + Algo2_st(popParams[i], tSample, mutationPropGrowth); + } + if( popParams[i].popSize <= 0.0 ) { + // this i has never been non-zero in any sampling time + // eh?? + // If it is 0 here, remove from _current_ population. Anything that + // has had a non-zero size at sampling time is preserved (if it + // needs to be preserved, because it is keepEvery time). + sp_to_remove.push_back(i); + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n Removing species i = " << i + << " with genotype = "; + print_Genotype(Genotypes[i]); //genotypeSingleVector(Genotypes[i]); +#endif + } +#ifdef DEBUGV + Rcpp::Rcout << "\n\n post-update popSize = " + << popParams[i].popSize << "\n"; +#endif + } +} + +// zz: add population size of parent, to get the true LOD +// as in Szendro +void addToPhylog(PhylogName& phylog, + const Genotype& parent, + const Genotype& child, + const double time, + const std::map& intName, + const fitness_as_genes& fg, + const double pop_size_child) { + phylog.time.push_back(time); + phylog.parent.push_back(genotypeToNameString(genotypeSingleVector(parent), + fg, intName)); + phylog.child.push_back(genotypeToNameString(genotypeSingleVector(child), + fg, intName)); + phylog.pop_size_child.push_back(pop_size_child); +} + +// // Only called when the child has pop size of 0 +// // so true LOD +// void addToLOD(LOD& lod, +// const Genotype& parent, +// const Genotype& child, +// // const double time, +// const std::map& intName, +// const fitness_as_genes& fg) { +// // lod.time.push_back(time); +// lod.parent.push_back(genotypeToNameString(genotypeSingleVector(parent), +// fg, intName)); +// lod.child.push_back(genotypeToNameString(genotypeSingleVector(child), +// fg, intName)); +// } + + +// Only called when the child has pop size of 0 +// so true LOD +// Use a map for LOD, and overwrite the parent: +// we only add when the size of the child is 0 +// The key of the map is the child. + +// FIXME: we might want to store the time? Not really clear even if that +// makes sense. We would be storing the last time the child (which had 0 +// size at that time) arose from the parent. +// A simple kludge is to have two maps, the second with child and time. +// Or do it properly as map +// genot_time_struct {string parent; double time} + +void addToLOD(std::map& lod, + const Genotype& parent, + const Genotype& child, + // const double time, + const std::map& intName, + const fitness_as_genes& fg) { + std::string parent_str = genotypeToNameString(genotypeSingleVector(parent), + fg, intName); + std::string child_str = genotypeToNameString(genotypeSingleVector(child), + fg, intName); + lod[child_str] = parent_str; + // // lod.time.push_back(time); + // lod.parent.push_back(genotypeToNameString(genotypeSingleVector(parent), + // fg, intName)); + // lod.child.push_back(genotypeToNameString(genotypeSingleVector(child), + // fg, intName)); +} + + +void addToPOM(POM& pom, + const Genotype& genotype, + const std::map& intName, + const fitness_as_genes& fg) { + + if (pom.genotypes.empty()) { + std::string g = genotypeToNameString(genotypeSingleVector(genotype), + fg, intName); + pom.genotypesString.push_back(g); + pom.genotypes.push_back(genotype); + } else if ( !(pom.genotypes.back() == genotype) ) { + std::string g = genotypeToNameString(genotypeSingleVector(genotype), + fg, intName); + pom.genotypesString.push_back(g); + pom.genotypes.push_back(genotype); + } +} + +// to explicitly signal extinction +void addToPOM(POM& pom, + const std::string string) { + pom.genotypesString.push_back(string); +} + + + +static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects, + const double& initSize, + const double& K, + // const double& alpha, + // const double& genTime, + const TypeModel typeModel, + const int& mutationPropGrowth, + const std::vector& mu, + // const double& mu, + const double& death, + const double& keepEvery, + const double& sampleEvery, + const std::vector& initMutant, + const time_t& start_time, + const double& maxWallTime, + const double& finalTime, + const double& detectionSize, + const int& detectionDrivers, + const double& minDetectDrvCloneSz, + const double& extraTime, + const int& verbosity, + double& totPopSize, + double& em1, + double& em1sc, + // double& n_1, + // double& en1, + double& ratioForce, + double& currentTime, + int& speciesFS, + int& outNS_i, + int& iter, + std::vector& genot_out, + std::vector& popSizes_out, + std::vector& index_out, + std::vector& time_out, + std::vector& sampleTotPopSize, + std::vector& sampleLargestPopSize, + std::vector& sampleMaxNDr, + std::vector& sampleNDrLargestPop, + bool& reachDetection, + std::mt19937& ran_gen, + // randutils::mt19937_rng& ran_gen, + double& runningWallTime, + bool& hittedWallTime, + const std::map& intName, + const fitness_as_genes& genesInFitness, + PhylogName& phylog, + bool keepPhylog, + const fitnessEffectsAll& muEF, + const std::vector& full2mutator, + const double& cPDetect, + const double& PDBaseline, + const double& checkSizePEvery, + const bool& AND_DrvProbExit, + const std::vector< std::vector >& fixation_l, + const double& fixation_tolerance, + const int& min_successive_fixation, + const double& fixation_min_size, + int& ti_dbl_min, + int& ti_e3, + std::map& lod, + // LOD& lod, + POM& pom) { + + double nextCheckSizeP = checkSizePEvery; + const int numGenes = fitnessEffects.genomeSize; + + double mymindummy = 1.0e-11; //1e-10 + double targetmindummy = 1.0e-10; //1e-9 + double minmu = *std::min_element(mu.begin(), mu.end()); + // Very small, but no less than mymindummy, for numerical issues. + // We can probably go down to 1e-13. 1e-16 is not good as we get lots + // of pE.f not finite. 1e-15 is probably too close, and even if no pE.f + // we can get strange behaviors. + double dummyMutationRate = std::max(std::min(minmu/1.0e4, targetmindummy), + mymindummy); + // This should very rarely happen: + if(minmu <= mymindummy) { // 1e-9 + double newdd = minmu/100.0; + Rcpp::Rcout << "WARNING: the smallest mutation rate is " + << "<= " << mymindummy << ". That is a really small value" + << "(per-base mutation rate in the human genome is" + << " ~ 1e-11 to 1e-9). " + << "Setting dummyMutationRate to your min/100 = " + << newdd + << ". There can be numerical problems later.\n"; + dummyMutationRate = newdd; + } + // double dummyMutationRate = 1e-10; + // ALWAYS initialize this here, or reinit or rezero + genot_out.clear(); + + phylog = PhylogName(); + // lod = LOD(); + lod.clear(); + pom = POM(); + + popSizes_out.clear(); + index_out.clear(); + time_out.clear(); + totPopSize = 0.0; + sampleTotPopSize.clear(); + currentTime = 0.0; + iter = 0; + + outNS_i = -1; + + sampleTotPopSize.clear(); + sampleLargestPopSize.clear(); + sampleMaxNDr.clear(); + sampleNDrLargestPop.clear(); + // end of rezeroing. + + + // } + // anyForceRerunIssues = false; + + bool forceSample = false; + bool simulsDone = false; + double lastStoredSample = 0.0; + + + double minNextMutationTime; + double mutantTimeSinceLastUpdate; + double timeNextPopSample; + double tSample; + + std::vector newMutations; + int nextMutant; + unsigned int numSpecies = 0; + int numMutablePosParent = 0; + + + unsigned int sp = 0; + //int type_resize = 0; + + int iterL = 1000; + int speciesL = 100; + //int timeL = 1000; + + int iterInterrupt = 50000; //how large should we make this? + + double tmpdouble1 = 0.0; + double tmpdouble2 = 0.0; + + std::vectorsp_to_remove(1); + sp_to_remove.reserve(10000); + + // those to update + int to_update = 1; //1: one species; 2: 2 species; 3: all. + int u_1 = -99; + int u_2 = -99; + + Genotype newGenotype; + std::vector Genotypes(1); + Genotypes[0] = wtGenotype(); //Not needed, but be explicit. + + std::vector popParams(1); + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 1 "; + // print_spP(popParams[0]); + // // end debug + + + const int sp_per_period = 5000; + popParams.reserve(sp_per_period); + Genotypes.reserve(sp_per_period); + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 01 "; + // print_spP(popParams[0]); + // // end debug + + + spParamsP tmpParam; + init_tmpP(tmpParam); + init_tmpP(popParams[0]); + popParams[0].popSize = initSize; + totPopSize = initSize; + + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10000 "; + // print_spP(popParams[0]); + // // end debug + + + + std::vectormutablePos(numGenes); // could be inside getMuatedPos_bitset + + // multimap to hold nextMutationTime + std::multimap mapTimes; + //std::multimap::iterator m1pos; + + // int ti_dbl_min = 0; + // int ti_e3 = 0; + ti_dbl_min = 0; + ti_e3 = 0; + + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10002 "; + // print_spP(popParams[0]); + // // end debug + + + + // // Beerenwinkel + // double adjust_fitness_B = -std::numeric_limits::infinity(); + //McFarland + double adjust_fitness_MF = -std::numeric_limits::infinity(); + + // for McFarland error + em1 = 0.0; + em1sc = 0.0; + // n_0 = 0.0; + // n_1 = 0.0; + // double tps_0; //, tps_1; + // tps_0 = totPopSize; + // tps_1 = totPopSize; + + // en1 = 0; + double totPopSize_previous = totPopSize; + double DA_previous = log1p(totPopSize_previous/K); + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10004 "; + // print_spP(popParams[0]); + // // end debug + + + + int lastMaxDr = 0; + double done_at = -9; + + + int num_successive_fixation = 0; // none so far + + +#ifdef MIN_RATIO_MUTS_NR + g_min_birth_mut_ratio_nr = DBL_MAX; + g_min_death_mut_ratio_nr = DBL_MAX; + g_tmp1_nr = DBL_MAX; +#endif + + // // FIXME debug + // Rcpp::Rcout << " popSize[0] at 1b "; + // print_spP(popParams[0]); + // // end debug + + // This long block, from here to X1, is ugly and a mess! + // This is what takes longer to figure out whenever I change + // anything. FIXME!! + if(initMutant.size() > 0) { + Genotypes[0] = createNewGenotype(wtGenotype(), + initMutant, + fitnessEffects, + ran_gen, + false); + int numGenesInitMut = Genotypes[0].orderEff.size() + + Genotypes[0].epistRtEff.size() + Genotypes[0].rest.size(); + int numGenesGenotype = fitnessEffects.allGenes.size(); + popParams[0].numMutablePos = numGenesGenotype - numGenesInitMut; + // Next two are unreachable since caught in R. + // But just in case, since it would lead to seg fault. + if(popParams[0].numMutablePos < 0) + throw std::invalid_argument("initMutant's genotype has more genes than are possible."); + if(popParams[0].numMutablePos == 0) + throw std::invalid_argument("initMutant has no mutable positions: genotype with all genes mutated."); + // popParams[0].numMutablePos = numGenes - 1; + // From obtainMutations, but initMutant an int vector. But cumbersome. + // std::vector sortedg = convertGenotypeFromInts(initMutant); + // sort(sortedg.begin(), sortedg.end()); + // std::vector nonmutated; + // set_difference(fitnessEffects.allGenes.begin(), fitnessEffects.allGenes.end(), + // sortedg.begin(), sortedg.end(), + // back_inserter(nonmutated)); + // popParams[0].numMutablePos = nonmutated.size(); + + + + // Commenting out the unused models! + // if(typeModel == TypeModel::beerenwinkel) { + + // popParams[0].death = 1.0; //note same is in McFarland. + // // But makes sense here; adjustment in beerenwinkel is via fitness + + // // initialize to prevent birth/mutation warning with Beerenwinkel + // // when no mutator. O.w., the defaults + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = prodFitness(evalGenotypeFitness(Genotypes[0], + // fitnessEffects)); + // updateRatesBeeren(popParams, adjust_fitness_B, initSize, + // currentTime, alpha, initSize, + // mutationPropGrowth, mu); + // } else if(typeModel == TypeModel::mcfarland0) { + // // death equal to birth of a non-mutant. + // popParams[0].death = log1p(totPopSize/K); // log(2.0), except rare cases + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = prodFitness(evalGenotypeFitness(Genotypes[0], + // fitnessEffects)); + // updateRatesMcFarland0(popParams, adjust_fitness_MF, K, + // totPopSize, + // mutationPropGrowth, mu); + // } else if(typeModel == TypeModel::mcfarland) { + // popParams[0].death = totPopSize/K; + // popParams[0].birth = prodFitness(evalGenotypeFitness(Genotypes[0], + // fitnessEffects)); + // } else if(typeModel == TypeModel::mcfarlandlog) { + + if(typeModel == TypeModel::mcfarlandlog) { + popParams[0].death = log1p(totPopSize/K); + popParams[0].birth = prodFitness(evalGenotypeFitness(Genotypes[0], + fitnessEffects)); + } else if(typeModel == TypeModel::bozic1) { + tmpParam.birth = 1.0; + tmpParam.death = -99.9; + // } else if (typeModel == TypeModel::bozic2) { + // tmpParam.birth = -99; + // tmpParam.death = -99; + } else if (typeModel == TypeModel::exp) { + tmpParam.birth = -99; + tmpParam.death = death; + } else { + // caught in R, so unreachable here + throw std::invalid_argument("this ain't a valid typeModel"); + } + // if( (typeModel != TypeModel::beerenwinkel) && (typeModel != TypeModel::mcfarland0) + // && (typeModel != TypeModel::mcfarland) && (typeModel != TypeModel::mcfarlandlog)) // wouldn't matter + // nr_fitness(popParams[0], tmpParam, + // Genotypes[0], + // fitnessEffects, + // typeModel, genTime, + // adjust_fitness_B, adjust_fitness_MF); + if( (typeModel != TypeModel::mcfarlandlog)) // wouldn't matter + nr_fitness(popParams[0], tmpParam, + Genotypes[0], + fitnessEffects, + typeModel); + // , genTime); + // adjust_fitness_B, adjust_fitness_MF); + // we pass as the parent the tmpParam; it better initialize + // everything right, or that will blow. Reset to init + init_tmpP(tmpParam); + addToPOM(pom, Genotypes[0], intName, genesInFitness); + } else { //no initMutant + popParams[0].numMutablePos = numGenes; + + // if(typeModel == TypeModel::beerenwinkel) { + // popParams[0].death = 1.0; + // // initialize to prevent birth/mutation warning with Beerenwinkel + // // when no mutator. O.w., the defaults + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = 1.0; + // updateRatesBeeren(popParams, adjust_fitness_B, initSize, + // currentTime, alpha, initSize, + // mutationPropGrowth, mu); + // } else if(typeModel == TypeModel::mcfarland0) { + // popParams[0].death = log1p(totPopSize/K); + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = 1.0; + // updateRatesMcFarland0(popParams, adjust_fitness_MF, K, + // totPopSize, + // mutationPropGrowth, mu); + // } else if(typeModel == TypeModel::mcfarland) { + // popParams[0].birth = 1.0; + // popParams[0].death = totPopSize/K; + // // no need to call updateRates + // } else if(typeModel == TypeModel::mcfarlandlog) { + if(typeModel == TypeModel::mcfarlandlog) { + popParams[0].birth = 1.0; + popParams[0].death = log1p(totPopSize/K); + // no need to call updateRates + } else if(typeModel == TypeModel::bozic1) { + popParams[0].birth = 1.0; + popParams[0].death = 1.0; + // } else if (typeModel == TypeModel::bozic2) { + // popParams[0].birth = 0.5/genTime; + // popParams[0].death = 0.5/genTime; + } else if (typeModel == TypeModel::exp) { + popParams[0].birth = 1.0; + popParams[0].death = death; + } else { + throw std::invalid_argument("this ain't a valid typeModel"); + } + } + + + + + + // // these lines (up to, and including, R_F_st) + // // not needed with mcfarland0 or beerenwinkel + // if(mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].birth * popParams[0].numMutablePos; + // else + // popParams[0].mutation = mu * popParams[0].numMutablePos; + + popParams[0].mutation = mutationFromScratch(mu, popParams[0], Genotypes[0], + fitnessEffects, mutationPropGrowth, + full2mutator, muEF); + W_f_st(popParams[0]); + R_f_st(popParams[0]); + + + // X1: end of mess of initialization block + + popParams[0].pv = mapTimes.insert(std::make_pair(-999, 0)); + + if( keepEvery > 0 ) { + // We keep the first ONLY if we are storing more than one. + outNS_i++; + time_out.push_back(currentTime); + + genot_out.push_back(Genotypes[0]); + popSizes_out.push_back(popParams[0].popSize); + index_out.push_back(outNS_i); + + sampleTotPopSize.push_back(popParams[0].popSize); + sampleLargestPopSize.push_back(popParams[0].popSize); + sampleMaxNDr.push_back(getGenotypeDrivers(Genotypes[0], + fitnessEffects.drv).size()); + sampleNDrLargestPop.push_back(sampleMaxNDr[0]); + } + // FIXME: why next line and not just genot_out.push_back(Genotypes[i]); + // if keepEvery > 0? We do that already. + // It is just ugly to get a 0 in that first genotype when keepEvery < 0 + // uniqueGenotypes.insert(Genotypes[0].to_ullong()); + timeNextPopSample = currentTime + sampleEvery; + numSpecies = 1; + + +#ifdef DEBUGV + Rcpp::Rcout << "\n the initial species\n"; + print_spP(popParams[0]); +#endif + + + + + + while(!simulsDone) { + // Check how we are doing with time as first thing. + runningWallTime = difftime(time(NULL), start_time); + if( runningWallTime > maxWallTime ) { + hittedWallTime = true; + forceSample = true; + simulsDone = true; + } + + iter++; + + if( !(iter % iterInterrupt)) + Rcpp::checkUserInterrupt(); + + if(verbosity > 1) { + if(! (iter % iterL) ) { + Rcpp::Rcout << "\n\n ... iteration " << iter; + Rcpp::Rcout << "\n ... currentTime " << currentTime <<"\n"; + } + if(!(numSpecies % speciesL )) { + Rcpp::Rcout << "\n\n ... iteration " << iter; + Rcpp::Rcout << "\n\n ... numSpecies " << numSpecies << "\n"; + } + } + + // ************ 5.2 *************** + if(verbosity >= 2) + Rcpp::Rcout <<"\n\n\n*** Looping through 5.2. Iter = " << iter + << ". Current time " << currentTime << " \n"; + + tSample = std::min(timeNextPopSample, finalTime); + +#ifdef DEBUGV + Rcpp::Rcout << " DEBUGV\n"; + Rcpp::Rcout << "\n ForceSample? " << forceSample + << " tSample " << tSample + << " currentTime " << currentTime; +#endif + + if(iter == 1) { // handle special case of first iter + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[0], + currentTime, + tSample, + ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, 0, tmpdouble1); + //popParams[0].Flag = false; + popParams[0].timeLastUpdate = currentTime; + } else { // any other iter + if(to_update == 1) { + // we did not sample or mutate to a different species in previous period + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[u_1], + currentTime, + tSample, + ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1); + popParams[u_1].timeLastUpdate = currentTime; + +#ifdef DEBUGV + detect_ti_duplicates(mapTimes, tmpdouble1, u_1); +#endif + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update one ******\n For to_update = \n " + << " tSample = " << tSample + + << "\n\n** Species = " << u_1 + << "\n genotype = "; + print_Genotype(Genotypes[u_1]); + Rcpp::Rcout << "\n popSize = " << popParams[u_1].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble1 + << " \n species R " << popParams[u_1].R + << " \n species W " << popParams[u_1].W + << " \n species death " << popParams[u_1].death + << " \n species birth " << popParams[u_1].birth; +#endif + + } else if(to_update == 2) { + // we did not sample in previous period. + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[u_1], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1); + tmpdouble2 = ti_nextTime_tmax_2_st(popParams[u_2], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_2, tmpdouble2); + popParams[u_1].timeLastUpdate = currentTime; + popParams[u_2].timeLastUpdate = currentTime; + +#ifdef DEBUGV + detect_ti_duplicates(mapTimes, tmpdouble1, u_1); + detect_ti_duplicates(mapTimes, tmpdouble2, u_2); +#endif + + + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update two ******\n " + << " tSample = " << tSample + + << "\n\n** Species = " << u_1 + << "\n genotype = "; + print_Genotype(Genotypes[u_1]); + Rcpp::Rcout << "\n popSize = " << popParams[u_1].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble1 + << " \n species R " << popParams[u_1].R + << " \n species W " << popParams[u_1].W + << " \n species death " << popParams[u_1].death + << " \n species birth " << popParams[u_1].birth + + + << "\n\n** Species = " << u_2 + << "\n genotype = "; + print_Genotype(Genotypes[u_2]); + Rcpp::Rcout << "\n popSize = " << popParams[u_2].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble2 + << " \n species R " << popParams[u_2].R + << " \n species W " << popParams[u_2].W + << " \n species death " << popParams[u_2].death + << " \n species birth " << popParams[u_2].birth; +#endif + + } else { // we sampled, so update all: i.e. to_update == 3 + for(size_t i = 0; i < popParams.size(); i++) { + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[i], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, i, tmpdouble1); + popParams[i].timeLastUpdate = currentTime; +#ifdef DEBUGV + detect_ti_duplicates(mapTimes, tmpdouble1, i); +#endif + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update all ******\n " + << " Species = " << i + << "\n genotype = "; + print_Genotype(Genotypes[i]); + Rcpp::Rcout << "\n popSize = " << popParams[i].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble1 + << " \n species R " << popParams[i].R + << " \n species W " << popParams[i].W + << " \n species death " << popParams[i].death + << " \n species birth " << popParams[i].birth; + +#endif + } + } + } + if(forceSample) { + // A VERY ugly hack. Resetting tSample to jump to sampling. + tSample = currentTime; + // Need this, o.w. would skip a sampling. + timeNextPopSample = currentTime; + } + + + // ******************** 5.3 and do we sample? *********** + // Find minimum to know if we need to sample the whole pop + // We also obtain the nextMutant + getMinNextMutationTime4(nextMutant, minNextMutationTime, + mapTimes); + + if(verbosity >= 2) { + Rcpp::Rcout << "\n\n iteration " << iter << "; minNextMutationTime = " + << minNextMutationTime + << "; timeNextPopSample = " << timeNextPopSample + << "; popParams.size() = " << popParams.size() << "\n"; + } + + // Do we need to sample the population? + if( minNextMutationTime <= tSample ) {// We are not sampling + // ************ 5.3 ************** + currentTime = minNextMutationTime; + // ************ 5.4 *************** + mutantTimeSinceLastUpdate = currentTime - + popParams[nextMutant].timeLastUpdate; + + popParams[nextMutant].popSize = Algo3_st(popParams[nextMutant], + mutantTimeSinceLastUpdate); + + if(popParams[nextMutant].popSize > (ratioForce * detectionSize)) { + forceSample = true; + ratioForce = std::min(1.0, 2 * ratioForce); +#ifdef DEBUGV + //if(verbosity > -2) { + // We always warn about this, since interaction with ti==0 + Rcpp::Rcout << "\n Forced sampling triggered for next loop: \n " << + " popParams[nextMutant].popSize = " << + popParams[nextMutant].popSize << " > ratioForce * detectionSize \n"; + Rcpp::Rcout << " when nextMutant = " << nextMutant << + " at iteration " << iter << "\n"; + //} +#endif + } + // Check also for numSpecies, and force sampling if needed + // This is very different from the other algos, as we do not yet + // know total number of different species + // This is a protection against things going wild. Should + // not happen in regular usage. + if(! (numSpecies % speciesFS )) { + forceSample = true; + speciesFS *= 2; +#ifdef DEBUGV + //if(verbosity > -2) // we always warn about this + + Rcpp::Rcout << "\n Forced sampling triggered for next loop " + << " when numSpecies = " << + numSpecies << " at iteration " << iter << "\n"; +#endif + } + + if(popParams[nextMutant].numMutablePos != 0) { + // this is the usual case. The alternative is the dummy or null mutation + + + // ************ 5.5 *************** + + newMutations.clear(); + // FIXME: nonmutated also returned here + obtainMutations(Genotypes[nextMutant], + fitnessEffects, + numMutablePosParent, + newMutations, + ran_gen, + mu); + //DP2(newMutations); + // nr_change + // getMutatedPos_bitset(mutatedPos, numMutablePosParent, // r, + // ran_gen, + // mutablePos, + // Genotypes[nextMutant], + // numGenes); + + // ************ 5.6 *************** + newGenotype = createNewGenotype(Genotypes[nextMutant], + newMutations, + fitnessEffects, + ran_gen, + true); + // nr_change + // newGenotype = Genotypes[nextMutant]; + // newGenotype.set(mutatedPos); + // newGenotype[mutatedPos] = 1; + + // FIXME + // any speed diff between a) and b)? + // a) + new_sp_v(sp, newGenotype, Genotypes); + // b) + // sp = 0; + // sp = new_sp(newGenotype, Genotypes); + + // nr_change + // new_sp_bitset(sp, newGenotype, Genotypes); + + if(sp == numSpecies) {// New species + ++numSpecies; + init_tmpP(tmpParam); + + if(verbosity >= 2) { + Rcpp::Rcout <<"\n Creating new species " << (numSpecies - 1) + << " from species " << nextMutant; + } + +#ifdef DEBUGW + if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) { + DP2(currentTime); //this is set to minNextMutationTime above + DP2(minNextMutationTime); + DP2(tSample); + DP2(popParams[nextMutant].timeLastUpdate); + DP2( (currentTime - popParams[nextMutant].timeLastUpdate) ); + DP2( (currentTime < popParams[nextMutant].timeLastUpdate) ); + DP2( (currentTime == popParams[nextMutant].timeLastUpdate) ); + DP2(nextMutant); + DP2(u_1); + DP2(u_2); + DP2(tmpdouble1); + DP2(tmpdouble2); + DP2(popParams[nextMutant].timeLastUpdate); + DP2(popParams[u_1].timeLastUpdate); + DP2(popParams[u_2].timeLastUpdate); + DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); + print_spP(popParams[nextMutant]); + throw std::out_of_range("new species: currentTime - timeLastUpdate[sp] out of range. ***###!!!Serious bug!!!###***"); + } +#endif + tmpParam.popSize = 1; + + nr_fitness(tmpParam, popParams[nextMutant], + newGenotype, + fitnessEffects, + typeModel);// , genTime, + // adjust_fitness_B, adjust_fitness_MF); + + if(tmpParam.birth > 0.0) { + // if(keepMutationTimes) + // update_mutation_freqs(newMutation, currentTime, mutation_freq_at); + //FIXME: phylog + // if(keepPhylog) + // addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime, + // intName, genesInFitness); + + tmpParam.numMutablePos = numMutablePosParent - 1; + tmpParam.mutation = mutationFromScratch(mu, tmpParam, newGenotype, + fitnessEffects, + mutationPropGrowth, full2mutator, + muEF); + // tmpParam.mutation = mutationFromParent(mu, tmpParam, popParams[nextMutant], + // newMutations, mutationPropGrowth, + // newGenotype, full2mutator, + // muEF); + + + //tmpParam.mutation = mu * (numMutablePosParent - 1); + if (tmpParam.mutation > 1 ) + Rcpp::Rcout << "WARNING: mutation > 1\n"; + if (numMutablePosParent == 1) { + if(verbosity >= 1) + Rcpp::Rcout << "Note: mutation = 0; no positions left for mutation\n"; + // FIXME:varmutrate: give the value of dummy here. + tmpParam.mutation = dummyMutationRate; // dummy mutation here. Set some mu. + } + W_f_st(tmpParam); + R_f_st(tmpParam); + tmpParam.timeLastUpdate = -99999.99999; //mapTimes_updateP does what it should. + // as this is a new species + popParams.push_back(tmpParam); + Genotypes.push_back(newGenotype); + to_update = 2; +#ifdef MIN_RATIO_MUTS_NR + g_tmp1_nr = tmpParam.birth/tmpParam.mutation; + if(g_tmp1_nr < g_min_birth_mut_ratio_nr) g_min_birth_mut_ratio_nr = g_tmp1_nr; + + g_tmp1_nr = tmpParam.death/tmpParam.mutation; + if(g_tmp1_nr < g_min_death_mut_ratio_nr) g_min_death_mut_ratio_nr = g_tmp1_nr; +#endif + + // LOD: + // here first call to addToPhylog, with popSize popParams[sp].popSize + // and it is 0 + if(keepPhylog) + addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime, + intName, genesInFitness, 0); + // LOD, as LOD sensu stricto, always done now + addToLOD(lod, Genotypes[nextMutant], newGenotype, // currentTime, + intName, genesInFitness); + + } else {// fitness is 0, so we do not add it + --sp; + --numSpecies; + to_update = 1; + } + // #ifdef DEBUGV + if(verbosity >= 3) { + Rcpp::Rcout << " \n\n\n Looking at NEW species " << sp << " at creation"; + Rcpp::Rcout << "\n New Genotype :"; + print_Genotype(newGenotype); + Rcpp::Rcout << "\n Parent Genotype :"; + print_Genotype(Genotypes[nextMutant]); + // Rcpp::Rcout << "\n Genotype = " << genotypeSingleVector(newGenotype); //Genotypes[sp]; + //Genotypes[sp].to_ullong(); + Rcpp::Rcout << "\n birth of sp = " << tmpParam.birth; + Rcpp::Rcout << "\n death of sp = " << tmpParam.death; + // Rcpp::Rcout << "\n s = " << s; + Rcpp::Rcout << "\n parent birth = " << popParams[nextMutant].birth; + Rcpp::Rcout << "\n parent death = " << popParams[nextMutant].death; + // Rcpp::Rcout << "\n parent Genotype = " << genotypeSingleVector(Genotypes[nextMutant]); + Rcpp::Rcout << "\n\n popParams parent: \n"; + print_spP(popParams[nextMutant]); + Rcpp::Rcout << "\n\npopParams child: \n"; + print_spP(tmpParam); + } + // #endif + } else { // A mutation to pre-existing species + + // What we do here is step 6 of Algorithm 5, in the "Otherwise", + // in p. 5 of suppl mat. We will update both, and only these + // two. + to_update = 2; + +#ifdef DEBUGW + if( (currentTime - popParams[sp].timeLastUpdate) < 0.0) { + // Yes, the difference could be 0 if two next mutation times are identical. + // You enable detect_ti_duplicates and use trigger-duplicated-ti.R + // to see it. + // Often the involved culprits (nextMutant and the other, say sp) + // were lastUpdated with tiny difference and they were, when updated + // given an identical ti, each in its own run. + // Key is not timeLastUpdate. This is a possible sequence of events: + // - at time t0, species that will become nextMutant is updated and gets ti = tinm + // - t1: species u1 gets ti = tinm + // - t2: species u2 gets some ti > tinm + // - tinm becomes minimal, so we mutate u1, and it mutates to u2 + // - (so now the timeLastUpdate of u1 = u2 = tinm) + // - nextMutant is now mutated, and it mutates to u2, which becomes sp + // - tinm = timeLastUpdate of u1 and u2. + // - You will also see that number of mutations, or genotypes are such + // that, in this case, u2 is the most mutated, etc. + // - If you enable the detect_ti_duplicates, you would have seen duplicated ti + // for nextMutant and u1 + + // Even simpler is if above, nextMutant will mutate to u1 (not u2) so u1 becomes sp. + DP2(currentTime); //this is set to minNextMutationTime above + DP2(minNextMutationTime); + DP2(tSample); + DP2(popParams[sp].timeLastUpdate); + DP2( (currentTime - popParams[sp].timeLastUpdate) ); + DP2( (currentTime < popParams[sp].timeLastUpdate) ); + DP2( (currentTime == popParams[sp].timeLastUpdate) ); + DP2(sp); + DP2(nextMutant); + DP2(u_1); + DP2(u_2); + DP2(tmpdouble1); + DP2(tmpdouble2); + DP2(popParams[sp].timeLastUpdate); + DP2(popParams[nextMutant].timeLastUpdate); + DP2(popParams[u_1].timeLastUpdate); + DP2(popParams[u_2].timeLastUpdate); + DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); + print_spP(popParams[sp]); + print_spP(popParams[nextMutant]); + throw std::out_of_range("currentTime - timeLastUpdate[sp] out of range. ***###!!!Serious bug!!!###***"); + } + if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) { + DP2(currentTime); //this is set to minNextMutationTime above + DP2(minNextMutationTime); + DP2(tSample); + DP2(popParams[nextMutant].timeLastUpdate); + DP2( (currentTime - popParams[nextMutant].timeLastUpdate) ); + DP2( (currentTime < popParams[nextMutant].timeLastUpdate) ); + DP2( (currentTime == popParams[nextMutant].timeLastUpdate) ); + DP2(sp); + DP2(nextMutant); + DP2(u_1); + DP2(u_2); + DP2(tmpdouble1); + DP2(tmpdouble2); + DP2(popParams[sp].timeLastUpdate); + DP2(popParams[nextMutant].timeLastUpdate); + DP2(popParams[u_1].timeLastUpdate); + DP2(popParams[u_2].timeLastUpdate); + DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); + DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); + print_spP(popParams[sp]); + print_spP(popParams[nextMutant]); + throw std::out_of_range("currentTime - timeLastUpdate[nextMutant] out of range. ***###!!!Serious bug!!!###***"); + } +#endif + // if(verbosity >= 2) { +#ifdef DEBUGV + Rcpp::Rcout <<"\n Mutated to existing species " << sp + << " (Genotype = "; + print_Genotype(Genotypes[sp]); + // << "; sp_id = " << Genotypes[sp].to_ullong() + Rcpp::Rcout << ")" + << "\n from species " << nextMutant + << " (Genotypes = "; + print_Genotype(Genotypes[nextMutant]); + // << "; sp_id = " << Genotypes[sp].to_ullong() + Rcpp::Rcout << ")"; + // } +#endif + // FIXME00: the if can be removed?? + // Possibly. But note that the popParams[sp].popSize can be > + // 0, but when updated via Algo2 and added to 1.0 we can end + // in 1. Why? Because Algo2 can return a 0. The species + // "exist" in the sense that it had non-zero pop size when we + // last sampled/updated it. + + // What we do here is step 6 of Algorithm 5, in the + // "Otherwise", in p. 5 of suppl mat. + + if(popParams[sp].popSize > 0.0) { + popParams[sp].popSize = 1.0 + + Algo2_st(popParams[sp], currentTime, mutationPropGrowth); + if(verbosity >= 2) { + Rcpp::Rcout << "\n New popSize = " << popParams[sp].popSize << "\n"; + } + } else { + throw std::range_error("\n popSize == 0 but existing? \n"); + } +#ifdef DEBUGW + // This is wrong!!! if we set it to -999999, then the time to + // next mutation will not be properly updated. In fact, the + // mapTimes map becomes a mess because the former pv in the + // popParams is not removed so we end up inserting another pair + // for the same species. + // popParams[sp].timeLastUpdate = -99999.99999; // to catch errors +#endif + //popParams[sp].Flag = true; + + //zz: LOD: + // here one of the calls to addToPhylog, with popSize popParams[sp].popSize + if(keepPhylog) + addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime, + intName, genesInFitness, popParams[sp].popSize); + + + } + // *************** 5.7 *************** + // u_2 irrelevant if to_update = 1; + u_1 = nextMutant; + u_2 = static_cast(sp); + } else { // the null or dummy mutation case + // We increase size by 1, as we already called Algo3. And then + // update the ti. + ++popParams[nextMutant].popSize; + to_update = 1; + u_1 = nextMutant; + u_2 = -99; + if(verbosity >= 1) + Rcpp::Rcout << "Note: updating in null mutation\n"; + } + } else { // *********** We are sampling ********** + to_update = 3; //short_update = false; + if(verbosity >= 2) { + Rcpp::Rcout <<"\n We are SAMPLING"; + if(tSample < finalTime) { + Rcpp::Rcout << " at time " << tSample << "\n"; + } else + Rcpp::Rcout <<". We reached finalTime " << finalTime << "\n"; + } + + currentTime = tSample; + if(verbosity >= 3) + Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() << "\n"; + + nr_sample_all_pop_P(sp_to_remove, + popParams, Genotypes, tSample, + mutationPropGrowth); + timeNextPopSample += sampleEvery; + // When we call nr_totPopSize ... species that existed between + // their creation and sampling time are never reflected. That is OK. + // This is on purpose, but if you track the phylogeny, you might see + // in the phylogeny things that never get reflected in the pops.by.time + // object. + if(sp_to_remove.size()) + remove_zero_sp_nr(sp_to_remove, Genotypes, popParams, mapTimes); + + numSpecies = popParams.size(); + + nr_totPopSize_and_fill_out_crude_P(outNS_i, totPopSize, + lastStoredSample, + genot_out, + //sp_id_out, + popSizes_out, index_out, + time_out, + sampleTotPopSize,sampleLargestPopSize, + sampleMaxNDr, sampleNDrLargestPop, + simulsDone, + reachDetection, + lastMaxDr, + done_at, + Genotypes, popParams, + currentTime, + keepEvery, + detectionSize, + finalTime, + //endTimeEvery, + detectionDrivers, + verbosity, + minDetectDrvCloneSz, + extraTime, + fitnessEffects.drv, + cPDetect, + PDBaseline, + checkSizePEvery, + nextCheckSizeP, + ran_gen, + AND_DrvProbExit, + fixation_l, + fixation_tolerance, + min_successive_fixation, + fixation_min_size, + num_successive_fixation, + pom, intName, + genesInFitness); //keepEvery is for thinning + if(verbosity >= 3) { + Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() + << "\n totPopSize after sampling " << totPopSize << "\n"; + } + + // computeMcFarlandError(e1, n_0, tps_0, + // typeModel, totPopSize, K); //, initSize); + computeMcFarlandError_new(em1, em1sc, totPopSize_previous, DA_previous, + typeModel, totPopSize, K); + + if(simulsDone) + break; //skip last updateRates + + // if( (typeModel == TypeModel::beerenwinkel) ) { + // updateRatesBeeren(popParams, adjust_fitness_B, + // initSize, currentTime, alpha, totPopSize, + // mutationPropGrowth, mu); + // } else if( (typeModel == TypeModel::mcfarland0) ) { + // updateRatesMcFarland0(popParams, adjust_fitness_MF, + // K, totPopSize, + // mutationPropGrowth, mu); + // } else if( (typeModel == TypeModel::mcfarland) ) { + // updateRatesMcFarland(popParams, adjust_fitness_MF, + // K, totPopSize); + // } else if( (typeModel == TypeModel::mcfarlandlog) ) { + if( (typeModel == TypeModel::mcfarlandlog) ) { + updateRatesMcFarlandLog(popParams, adjust_fitness_MF, + K, totPopSize); + } + +#ifdef MIN_RATIO_MUTS_NR + // could go inside sample_all_pop but here we are sure death, etc, current + // But I catch them when they are created. Is this really needed? + for(size_t i = 0; i < popParams.size(); i++) { + g_tmp1_nr = popParams[i].birth/popParams[i].mutation; + if(g_tmp1_nr < g_min_birth_mut_ratio_nr) g_min_birth_mut_ratio_nr = g_tmp1_nr; + + g_tmp1_nr = popParams[i].death/popParams[i].mutation; + if(g_tmp1_nr < g_min_death_mut_ratio_nr) g_min_death_mut_ratio_nr = g_tmp1_nr; + } +#endif + + forceSample = false; + } + } +} + + + +// [[Rcpp::export]] +Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, + Rcpp::NumericVector mu_, + double death, + double initSize, + double sampleEvery, + double detectionSize, + double finalTime, + int initSp, + int initIt, + double seed, + int verbosity, + int speciesFS, + double ratioForce, + Rcpp::CharacterVector typeFitness_, + int maxram, + int mutationPropGrowth, + Rcpp::IntegerVector initMutant_, + double maxWallTime, + double keepEvery, + double K, + int detectionDrivers, + bool onlyCancer, + bool errorHitWallTime, + int maxNumTries, + bool errorHitMaxTries, + double minDetectDrvCloneSz, + double extraTime, + bool keepPhylog, + Rcpp::List MMUEF, + Rcpp::IntegerVector full2mutator_, + double n2, + double p2, + double PDBaseline, + double cPDetect_i, + double checkSizePEvery, + bool AND_DrvProbExit, + Rcpp::List fixation_i) { + // double cPDetect){ + // double n2, + // double p2, + // double PDBaseline) { + + + precissionLoss(); + const std::vector mu = Rcpp::as >(mu_); + const std::vector initMutant = Rcpp::as >(initMutant_); + const TypeModel typeModel = stringToModel(Rcpp::as(typeFitness_)); + + // A simple, vector-indexed way to map from numeric ids in full to + // numeric ids in mutator. Recall all genes start with 1. So full2mutator[i-1]; + const std::vector full2mutator = Rcpp::as >(full2mutator_); + // A consistency check + + // const double genTime = 4.0; // should be a parameter. For Bozic only. + + //If seed is -9, then use automatic seed. + + + // Code when using randutils + // randutils::mt19937_rng ran_gen; + // if(seed == 0) + // ran_gen.seed(); + // else { + // ran_gen.seed(static_cast(seed)); + // // The next does not solve the differences between clang and gcc. So + // // keep it simple. + // // std::seed_seq s1{static_cast(seed)}; + // // ran_gen.seed(s1); + // } + + unsigned int rseed = static_cast(seed); + if(seed == 0) { + rseed = std::random_device{}(); + } + std::mt19937 ran_gen(rseed); + + double cPDetect = cPDetect_i; + if( (n2 > 0) && (p2 > 0) ) { + if (PDBaseline <= 0) throw std::range_error("PDBaseline <= 0"); + cPDetect = set_cPDetect(n2, p2, PDBaseline); + if(verbosity >= 1) + Rcpp::Rcout << " cPDetect set at " << cPDetect << "\n"; + } + + if( (K < 1 ) && ( typeModel == TypeModel::mcfarlandlog) ) + throw std::range_error("K < 1."); + fitnessEffectsAll fitnessEffects = convertFitnessEffects(rFE); + //Used at least twice + std::map intName = mapGenesIntToNames(fitnessEffects); + fitness_as_genes genesInFitness = fitnessAsGenes(fitnessEffects); + PhylogName phylog; + // LOD lod; + std::map lod; + POM pom; + + // Mutator effects + fitnessEffectsAll muEF; + if( (full2mutator.size() != 0) ) + muEF = convertFitnessEffects(MMUEF); + else + muEF = nullFitnessEffects(); + // Paranoia. We should never end up here. + if( (full2mutator.size() != 0) && (muEF.genomeSize == 0)) + throw std::logic_error("full2mutator > 0 with mutatorEffects.genomesize 0"); + if( (full2mutator.size() == 0) && (muEF.genomeSize != 0)) { + throw std::logic_error("full2mutator 0 with mutatorEffects.genomesize != 0"); + } + + // fixation: run until some genotype combinations fixed + + double fixation_tolerance = -9; + int min_successive_fixation = 100; + double fixation_min_size = 0.0; + std::vector < std::vector > fixation_l; + + if( fixation_i.size() != 0 ) { + Rcpp::List fggl = fixation_i["fixation_list"] ; + fixation_l = list_to_vector_of_int_vectors(fggl); // FIXME + fixation_tolerance = Rcpp::as(fixation_i["fixation_tolerance"]); + min_successive_fixation = Rcpp::as(fixation_i["min_successive_fixation"]); + fixation_min_size = Rcpp::as(fixation_i["fixation_min_size"]); + } else { + fixation_l.resize(0); // explicit + } + + + bool runAgain = true; + bool reachDetection = false; + //Output + std::vector genot_out; + std::vector popSizes_out; + std::vector index_out; + std::vector time_out; //only one entry per period! + genot_out.reserve(initSp); + popSizes_out.reserve(initSp); + index_out.reserve(initSp); + time_out.reserve(initIt); + + double totPopSize = 0; + std::vector sampleTotPopSize; + std::vector sampleLargestPopSize; + std::vector sampleMaxNDr; //The largest number of drivers in any + //genotype or clone at each time sample + std::vector sampleNDrLargestPop; //Number of drivers in the clone + // with largest size (at each time + // sample) + sampleTotPopSize.reserve(initIt); + sampleLargestPopSize.reserve(initIt); + sampleMaxNDr.reserve(initIt); + sampleNDrLargestPop.reserve(initIt); + + int outNS_i = -1; // the column in the outNS + time_t start_time = time(NULL); + double runningWallTime = 0; + bool hittedWallTime = false; + bool hittedMaxTries = false; + + // spParamsP tmpParam; + // std::vector popParams(1); + // const int sp_per_period = 5000; + + // popParams.reserve(sp_per_period); + // Genotypes.reserve(sp_per_period); + + // std::vectormutablePos(numGenes); // could be inside getMuatedPos_bitset + + + // // multimap to hold nextMutationTime + // std::multimap mapTimes; + // //std::multimap::iterator m1pos; + + + // // count troublesome tis + // int ti_dbl_min = 0; + // int ti_e3 = 0; + + + + // // Beerenwinkel + // double adjust_fitness_B = -std::numeric_limits::infinity(); + // //McFarland + // double adjust_fitness_MF = -std::numeric_limits::infinity(); + + // double e1, n_0; //n_1; // for McFarland error + // double tps_0, tps_1; // for McFarland error + // tps_0 = 0.0; + // tps_1 = 0.0; + // e1 = 0.0; + // n_0 = 0.0; + // n_1 = 0.0; + + double em1, em1sc; // new computation of McFarland error + em1 = 0.0; + em1sc = 0.0; + + + // // For totPopSize_and_fill and bailing out + // // should be static vars inside funct, + // // but they keep value over calls in same R session. + // int lastMaxDr = 0; + // double done_at = -9; + // // totalPopSize at time t, at t-1 and the max error. + + // 5.1 Initialize + + int numRuns = 0; + int numRecoverExcept = 0; + bool forceRerun = false; + + double currentTime = 0; + int iter = 0; + + int ti_dbl_min = 0; + int ti_e3 = 0; + + int accum_ti_dbl_min = 0; + int accum_ti_e3 = 0; + // bool AND_DrvProbExit = ( (cpDetect >= 0) && + // (detectionDrivers < 1e9) && + // (detectionSize < std::numeric_limits::infinity())); + while(runAgain) { + + if(numRuns >= maxNumTries) { + // hittedMaxTries This we want here to avoid an extra run and + // confusing output + hittedMaxTries = true; + Rcpp::Rcout << "\n Hitted maxtries. Exiting."; + runAgain = false; + if(errorHitMaxTries) { + Rcpp::Rcout << "\n Hitting max tries is regarded as an error. \n"; + return + List::create(Named("HittedWallTime") = false, + Named("HittedMaxTries") = true, + Named("other") = + List::create(Named("UnrecoverExcept") = false)); + } + break; + } + + try { + Rcpp::checkUserInterrupt(); + + // it is CRUCIAL that several entries are zeroed (or -1) at the + // start of innerBNB now that we do multiple runs if onlyCancer = true. + + nr_innerBNB( + fitnessEffects, + initSize, + K, + // alpha, + // genTime, + typeModel, + mutationPropGrowth, + mu, + death, + keepEvery, + sampleEvery, + initMutant, + start_time, + maxWallTime, + finalTime, + detectionSize, + detectionDrivers, + minDetectDrvCloneSz, + extraTime, + verbosity, + totPopSize, + em1, + em1sc, + // n_0, + // // n_1, + // en1, + ratioForce, + currentTime, + speciesFS, + outNS_i, + iter, + genot_out, + popSizes_out, + index_out, + time_out, + sampleTotPopSize, + sampleLargestPopSize, + sampleMaxNDr, + sampleNDrLargestPop, + reachDetection, + ran_gen, + runningWallTime, + hittedWallTime, + intName, + genesInFitness, + phylog, + keepPhylog, + muEF, + full2mutator, + cPDetect, + PDBaseline, + checkSizePEvery, + AND_DrvProbExit, + fixation_l, + fixation_tolerance, + min_successive_fixation, + fixation_min_size, + ti_dbl_min, + ti_e3, + lod, + pom); + ++numRuns; + forceRerun = false; + accum_ti_dbl_min += ti_dbl_min; + accum_ti_e3 += ti_e3; + } catch (rerunExcept &e) { + Rcpp::Rcout << "\n Recoverable exception " << e.what() + << ". Rerunning."; + forceRerun = true; + ++numRecoverExcept; + ++numRuns; // exception should count here! + accum_ti_dbl_min += ti_dbl_min; + accum_ti_e3 += ti_e3; + } catch (const std::exception &e) { + Rcpp::Rcout << "\n Unrecoverable exception: " << e.what() + << ". Aborting. \n"; + return + List::create(Named("other") = + List::create(Named("UnrecoverExcept") = true, + Named("ExceptionMessage") = e.what())); + } catch (...) { + Rcpp::Rcout << "\n Unknown unrecoverable exception. Aborting." + << "(User interrupts also generate this).\n"; + return + List::create(Named("other") = + List::create(Named("UnrecoverExcept") = true, + Named("ExceptionMessage") = "Unknown exception")); + } + if(hittedWallTime) { + Rcpp::Rcout << "\n Hitted wall time. Exiting."; + runAgain = false; + if(errorHitWallTime) { + Rcpp::Rcout << "\n Hitting wall time is regarded as an error. \n"; + return + List::create(Named("HittedWallTime") = true, + Named("HittedMaxTries") = false, // yes, for + // coherent return + // objects + Named("other") = + List::create(Named("UnrecoverExcept") = false)); + } + // } else if(numRuns > maxNumTries) { + // // hittedMaxTries FIXME this is very, very confusing in limit + // // cases. suppose maxNumTries = 1. We will run two times, and the + // // second might have reached cancer, but we will bail out here, as + // // numRuns is actually 2. However, we report the value. And we run + // // once more than needed. + // hittedMaxTries = true; + // Rcpp::Rcout << "\n Hitted maxtries. Exiting."; + // runAgain = false; + // if(errorHitMaxTries) { + // Rcpp::Rcout << "\n Hitting max tries is regarded as an error. \n"; + // return + // List::create(Named("HittedWallTime") = false, + // Named("HittedMaxTries") = true, + // Named("other") = + // List::create(Named("UnrecoverExcept") = false)); + // } + } else if(forceRerun) { + runAgain = true; + forceRerun = false; + } else { + if(onlyCancer) { + runAgain = !reachDetection; + } else { + runAgain = false; + } + } +#ifdef DEBUGV + Rcpp::Rcout << "\n reachDetection = " << reachDetection; + Rcpp::Rcout << "\n forceRerun = " << forceRerun << "\n"; + +#endif + + } // runAgain loop + + + std::vector > genot_out_v = genot_to_vectorg(genot_out); + std::vector > uniqueGenotypes_vector_nr = + uniqueGenot_vector(genot_out_v); + IntegerMatrix returnGenotypes = + nr_create_returnGenotypes(fitnessEffects.genomeSize, + uniqueGenotypes_vector_nr); + Rcpp::NumericMatrix outNS = create_outNS(uniqueGenotypes_vector_nr, + genot_out_v, + popSizes_out, + index_out, time_out, + outNS_i, maxram); + + int maxNumDrivers = 0; + int totalPresentDrivers = 0; + std::vector countByDriver(fitnessEffects.drv.size(), 0); + std::vector presentDrivers; + driverCounts(maxNumDrivers, totalPresentDrivers, + countByDriver, presentDrivers, + returnGenotypes, fitnessEffects.drv); + + + std::vector genotypesAsStrings = + genotypesToNameString(uniqueGenotypes_vector_nr, genesInFitness, intName); + std::string driversAsString = + driversToNameString(presentDrivers, intName); + + // // // zz: debugging + // // // Correct too + // DP1("intName"); + // for(auto mmm: intName) { + // Rcpp::Rcout << mmm.first << " :" ; + // Rcpp::Rcout << mmm.second << std::endl; + // } + + + // // wrong + // DP1("genotypesAsStrings"); + // for(auto gas: genotypesAsStrings) { + // Rcpp::Rcout << gas; + // Rcpp::Rcout << std::endl; + // } + + + std::vector sampleLargestPopProp(outNS_i + 1); + if((outNS_i + 1) != static_cast(sampleLargestPopSize.size())) + throw std::length_error("outNS_i + 1 != sampleLargestPopSize.size"); + std::transform(sampleLargestPopSize.begin(), sampleLargestPopSize.end(), + sampleTotPopSize.begin(), + sampleLargestPopProp.begin(), + std::divides()); + NumericMatrix perSampleStats(outNS_i + 1, 5); + fill_SStats(perSampleStats, sampleTotPopSize, sampleLargestPopSize, + sampleLargestPopProp, sampleMaxNDr, sampleNDrLargestPop); + + // create the lod return pieces. Move to a function later + std::vector lod_parent; + std::vector lod_child; + for (const auto &l : lod) { + lod_child.push_back(l.first); + lod_parent.push_back(l.second); + } + + return + List::create(Named("pops.by.time") = outNS, + Named("NumClones") = uniqueGenotypes_vector_nr.size(), + Named("TotalPopSize") = totPopSize, + Named("Genotypes") = returnGenotypes, + Named("GenotypesWDistinctOrderEff") = Rcpp::wrap(uniqueGenotypes_vector_nr), + Named("GenotypesLabels") = Rcpp::wrap(genotypesAsStrings), + Named("MaxNumDrivers") = maxNumDrivers, + Named("MaxDriversLast") = sampleMaxNDr[outNS_i], + Named("NumDriversLargestPop") = sampleNDrLargestPop[outNS_i], + Named("LargestClone") = sampleLargestPopSize[outNS_i], + Named("PropLargestPopLast") = sampleLargestPopProp[outNS_i], + Named("FinalTime") = currentTime, + Named("NumIter") = iter, + Named("HittedWallTime") = hittedWallTime, + Named("HittedMaxTries") = hittedMaxTries, + Named("TotalPresentDrivers") = totalPresentDrivers, + Named("CountByDriver") = countByDriver, + Named("OccurringDrivers") = driversAsString, + Named("PerSampleStats") = perSampleStats, + Named("other") = List::create(Named("attemptsUsed") = numRuns, + Named("errorMF") = + returnMFE_new(em1sc, typeModel), + Named("errorMF_size") = + returnMFE_new(em1, typeModel), // Used to be e1, not log + // Named("errorMF_n_0") = n_0, +#ifdef MIN_RATIO_MUTS_NR + Named("minDMratio") = + g_min_death_mut_ratio_nr, + Named("minBMratio") = + g_min_birth_mut_ratio_nr, +#else + Named("minDMratio") = -99, + Named("minBMratio") = -99, +#endif + // Named("errorMF_n_1") = n_1, + Named("PhylogDF") = DataFrame::create( + Named("parent") = phylog.parent, + Named("child") = phylog.child, + Named("time") = phylog.time, + Named("pop_size_child") = phylog.pop_size_child + ), + Named("UnrecoverExcept") = false, + Named("numRecoverExcept") = numRecoverExcept, + Named("accum_ti_dbl_min") = accum_ti_dbl_min, + Named("accum_ti_e3") = accum_ti_e3, + Named("LOD_DF") = DataFrame::create( + Named("parent") = lod_parent, // lod.parent, + Named("child") = lod_child //lod.child + ), + Named("POM") = Rcpp::wrap(pom.genotypesString) + ) + ); +} + + +// Creating return object: + + +// The 0, 1 representation is how most of the work is done in R: do I want +// to change that? + + +// Order: beware of two things: order is important for the "true" +// genotypes, but is not immediately observable. So for 0,1 +// representation, not needed or used. Thus, maybe I want two +// representations. + +// Yes, the full Genotye structure is only used when assigning fitness. So +// could we use a collapsed one with: order + rest? Nope, as whenever I'd +// create a child from a genotype, I'd need like deconvolve, and go back +// to the three piece structure. This seems much more expensive than the +// overloaded == and the usage of the overloaded < (this is only used at +// the end, when producing the output objects) diff --git a/OncoSimulR/src-i386/BNB_v1.cpp b/OncoSimulR/src-i386/BNB_v1.cpp new file mode 100644 index 00000000..0f57df2d --- /dev/null +++ b/OncoSimulR/src-i386/BNB_v1.cpp @@ -0,0 +1,2313 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + +#include "debug_common.h" +#include "common_classes.h" +#include "bnb_common.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace Rcpp; +using std::vector; + + +// To track if mutation is really much smaller than birth/death +#define MIN_RATIO_MUTS +#ifdef MIN_RATIO_MUTS +// There is really no need for these to be globals? +// Unless I wanted to use them inside some function. So leave as globals. +double g_min_birth_mut_ratio = DBL_MAX; +double g_min_death_mut_ratio = DBL_MAX; +double g_tmp1 = DBL_MAX; +#endif + + +typedef std::bitset<64> Genotype64; + + +// Format of restrictTable +// - mutations in columns +// - first row, the number +// - second row, the number of dependencies +// - rest of rows, the id of the dependency +// - past number of dependencies: a -9 +// In fact, the first row is redundant. Leave it, just in case. + + +// Genotypes: the first (or column 0) genotype is the all ceros. +// would not be needed, but makes Algo5 a lot simpler. + +// But mutatedPos start at 0. +// Will need to add 1 when plotting and analyzing with R. + +// Ojo: typeCBN is going to be an int. +// But from R we pass a string, and that determined the integer. + + +static void fitness(spParamsP& tmpP, + const spParamsP& parentP, + const int& mutatedPos, + Rcpp::IntegerMatrix restrictTable, + const std::string& typeCBN, + const Genotype64& newGenotype, + // const double& birthRate, + const double& s, + // const double& death, + const int& numDrivers, + const std::string& typeFitness, + // const double& genTime, + // const double& adjust_fitness_B, + const double& sh){ + //const double& adjust_fitness_MF) { + + using namespace Rcpp; + // Two pieces: split into two functions?? + // - checking restrictions + // - returning actual fitness according + + + int numDependencies; + int sumDriversMet = 0; + int sumDriversNoMet = 0; + int sumDependenciesMet = 0; + + // set appropriate defaults. Change only needed stuff. + tmpP.birth = parentP.birth; + tmpP.death = parentP.death; + tmpP.absfitness = parentP.absfitness; + + + + + + // **** Are driver constraints met? *** + + + // Two cases: same s, sh, sp or different ones. If same, return three + // integers: sumDriversMet, sumDriversNoMet, sumPassengers. If + // different, return three vectors, filled with the non-zero + // entries. These vectors then are combined as dictated by the fintness + // functions. + + // If same single s, sh, sp: function takes three integers. O.w. it + // takes three integer vectors. + + + + if(mutatedPos >= numDrivers) { //the new mutation is a passenger + return; + } else { + for(int m = 0; m < numDrivers; ++m) { + if( newGenotype[m] ) { // this m is mutated + const Rcpp::IntegerMatrix::Column thisRestrict = + restrictTable(_, m); + numDependencies = thisRestrict[1]; + if(!numDependencies) { // this driver has no dependencies + sumDriversMet++; +#ifdef DEBUGZ + Rcpp::Rcout << "\n No dependencies: "; + DP2(sumDriversMet); +#endif + + } + else { + sumDependenciesMet = 0; + for(int i = 2; i < (2 + numDependencies); i++) { + sumDependenciesMet += newGenotype[ thisRestrict[i] ]; + } + if( ( (typeCBN == "Multiple") && (sumDependenciesMet) ) || + ( (typeCBN == "CBN") && (sumDependenciesMet == numDependencies) )) { + sumDriversMet++; + } else { + sumDriversNoMet++; + } + } + } + } + } + +#ifdef DEBUGZ + DP2(sumDriversMet); + DP2(sumDriversNoMet); + DP2(sh); + DP2(typeFitness); +#endif + + // if sh < 0 : we do not allow any unment dependencies. + // if sh = 0: no penalty for unmet dependencies + + + // FIXME: why not just pass the birth and death rates, and combine them + // in arbitrary ways? Might even allow to pass on death and birth rates + // from R. Only need care when any are density dependent. + + + // Beware: doing it this way with Bozic1 is kind of questionable because + // if birth = 1, there is no immediate extinction. In fact, there never + // is. + if((sh < 0) && sumDriversNoMet) { + tmpP.absfitness = 0.0; + tmpP.death = 1.0; + tmpP.birth = 0.0; // this is what really matters so that + // the pop does not get added. + // Line with comment "fitness is 0" + } else { + if(typeFitness == "bozic1") { + tmpP.death = pow( 1.0 - s, sumDriversMet) * + pow( 1.0 + sh, sumDriversNoMet); + tmpP.birth = 1.0; + // } else if (typeFitness == "bozic2") { + // double pp = pow( 1.0 - s, sumDriversMet) * + // pow( 1.0 + sh, sumDriversNoMet); + // tmpP.birth = (1.0/genTime) * (1.0 - 0.5 * pp ); + // tmpP.death = (0.5/genTime) * pp; + // } else if(typeFitness == "beerenwinkel") { + // // like Datta et al., 2013 + // tmpP.absfitness = pow(1.0 + s, sumDriversMet) * + // pow( 1.0 - sh, sumDriversNoMet); + // tmpP.birth = adjust_fitness_B * tmpP.absfitness; + // } else if(typeFitness == "mcfarland0") { + // tmpP.absfitness = pow(1.0 + s, sumDriversMet) / + // pow( 1.0 + sh, sumDriversNoMet); + // tmpP.birth = adjust_fitness_MF * tmpP.absfitness; + // } else if(typeFitness == "mcfarland") { + // tmpP.birth = pow(1.0 + s, sumDriversMet) / + // pow( 1.0 + sh, sumDriversNoMet); + } else if(typeFitness == "mcfarlandlog") { + tmpP.birth = pow(1.0 + s, sumDriversMet) / + pow( 1.0 + sh, sumDriversNoMet); + } else if (typeFitness == "exp") { + // Also like Datta et al., 2013 An additional driver gene mutation + // increases a cell’s fitness by a factor of (1+sd), whereas an + // additional housekeeper gene mutation decreases fitness by a + // factor of (1-sh) and the effect of multiple mutations is + // multiplicative + tmpP.birth = pow(1.0 + s, sumDriversMet) * + pow( 1.0 - sh, sumDriversNoMet); + +#ifdef DEBUGZ + double posi = pow(1.0 + s, sumDriversMet); + double negi = pow( 1.0 - sh, sumDriversNoMet); + DP2(posi); + DP2(negi); +#endif + + } // else if (typeFitness == "log") { + // tmpP.birth = birthRate+ s * log1p(sumDriversMet) - + // sh * log(1 + sumDriversNoMet); + // } else { // linear + // tmpP.birth = birthRate + s * static_cast(sumDriversMet) - + // sh * static_cast(sumDriversNoMet); + // } + } +} +// Notice: if restriction is 3 -> 4 -> 5 +// and one has 5 and 4, only 4 is unmet. Beware of that. +// So we talk about the immediate dependency or restriction. +// Not the whole transitive closure. + +// When birth == 0, popSize should become 0 immediately. +// No evaluation through random numbers, etc. +// This is how we do it. + +// How small can they get? +// d1 <- function(s, mut) { (1 - s)^mut} +// d2 <- function(s, mut) { (0.5/4) * ((1 - s)^mut) } + + +// limited benchmarks suggest the following is slower +// static inline void new_sp_bitset2(unsigned int& sp, const Genotype64& newGenotype, +// const std::vector& Genotypes) { +// sp = std::distance(Genotypes.begin(), +// std::find(Genotypes.begin(), +// Genotypes.end(), newGenotype)); +// } + + +static inline void new_sp_bitset(unsigned int& sp, const Genotype64& newGenotype, + const std::vector& Genotypes) { + sp = 0; + + for(sp = 0; sp < Genotypes.size(); ++sp) { + if( newGenotype == Genotypes[sp] ) + break; + } +} + + + +static void getMutatedPos_bitset(int& mutatedPos, int& numMutablePosParent, + //gsl_rng *r, + std::mt19937& ran_generator, + std::vector& mutablePos, + const Genotype64& nextMutantGenotype, + // const int& nextMutant, + // const std::vector& Genotypes, + const int& numGenes) { + // We want mutatedPos and numMutablePosParent + + // Note: impossible to have a second recorded mutation in + // the same gene. + + // Remember numMutablePosParent is the number of mutable positions in + // the parent! so after mutation is one less, but we do not decrease it + // here. + + numMutablePosParent = 0; + for(int i = 0; i < numGenes; ++i) { + if( !nextMutantGenotype.test(i) ) { + mutablePos[numMutablePosParent] = i; + ++numMutablePosParent; + } + } + + if(numMutablePosParent > 1) { + std::uniform_int_distribution unif(0, numMutablePosParent - 1); + mutatedPos = mutablePos[unif(ran_generator)]; + } else { + mutatedPos = mutablePos[0]; + } + + // if(numMutablePosParent > 1) { + // mutatedPos = mutablePos[gsl_rng_uniform_int(r, numMutablePosParent)]; + // } else { + // mutatedPos = mutablePos[0]; + // } + + +#ifdef DEBUGV + Rcpp::Rcout << "\n numMutablePosParent = " << numMutablePosParent; + Rcpp::Rcout << "\n mutatedPos = " << mutatedPos << "\n"; + +#endif + + // if(numMutablePos > 1) { + // mutatedPos = mutablePos[gsl_rng_uniform_int(r, numMutablePos)]; + // } else if (numMutablePos == 1) { + // mutatedPos = mutablePos[0]; + // } else { + // // Should never happen, as mutation = 0 if no mutable positions. + // throw std::out_of_range("Algo5: run out of mutable places!!??"); + // } + +} + + +static void remove_zero_sp_v7(std::vector& sp_to_remove, + std::vector& Genotypes, + std::vector& popParams, + std::multimap& mapTimes) { + // here("entering remove_zero_sp_v7"); + std::vector::iterator popParams_begin = popParams.begin(); + std::vector::iterator Genotypes_begin = Genotypes.begin(); + std::vector::reverse_iterator r = sp_to_remove.rbegin(); + int remove_this; + // for(r = sp_to_remove.rbegin(); r != sp_to_remove.rend(); ++r) { + while(r != sp_to_remove.rend() ) { + remove_this = *r; + mapTimes.erase(popParams[remove_this].pv); + popParams.erase(popParams_begin + remove_this); + Genotypes.erase(Genotypes_begin + remove_this); + ++r; + } + // here("exiting remove_zero_sp_v7"); + +} + +static inline int count_NDrivers(const Genotype64& Genotype, + const int& NumDrivers) { + int totalDr = 0; + for(int i = 0; i < NumDrivers; ++i) + totalDr += Genotype[i]; + return totalDr; +} + +static void totPopSize_and_fill_out_crude_P(int& outNS_i, + double& totPopSize, + double& lastStoredSample, + std::vector& genot_out, + //std::vector& sp_id_out, + std::vector& popSizes_out, + std::vector& index_out, + std::vector& time_out, + std::vector& sampleTotPopSize, + std::vector& sampleLargestPopSize, + std::vector& sampleMaxNDr, + std::vector& sampleNDrLargestPop, + bool& simulsDone, + bool& reachDetection, + int& lastMaxDr, + double& done_at, + const std::vector& Genotypes, + const std::vector& popParams, + const double& currentTime, + const int& NumDrivers, + const double& keepEvery, + const double& detectionSize, + const double& finalTime, + // const double& endTimeEvery, + const int& detectionDrivers, + const int& verbosity, + const double& minDetectDrvCloneSz, + const double& extraTime, + const double& fatalPopSize = 1e15) { + // Fill out, but also compute totPopSize + // and return sample summaries for popsize, drivers. + + // This determines if we are done or not by checking popSize, number of + // drivers, etc + + // static int lastMaxDr = 0; // preserves value across calls to Algo5 from R. + // so can not use it. + bool storeThis = false; + totPopSize = 0.0; + + // DP2(lastMaxDr); + // DP2(detectionDrivers); + // DP2(currentTime); + // DP2((lastStoredSample + endTimeEvery)); + // DP2(detectionSize); + + // this could all be part of popSize_over_m_dr, with a better name + int tmp_ndr = 0; + int max_ndr = 0; + double popSizeOverDDr = 0.0; + + for(size_t i = 0; i < popParams.size(); ++i) { + totPopSize += popParams[i].popSize; + tmp_ndr = count_NDrivers(Genotypes[i], NumDrivers); + if(tmp_ndr > max_ndr) max_ndr = tmp_ndr; + if(tmp_ndr >= detectionDrivers) popSizeOverDDr += popParams[i].popSize; + } + lastMaxDr = max_ndr; + + + if (keepEvery < 0) { + storeThis = false; + } else if( currentTime >= (lastStoredSample + keepEvery) ) { + storeThis = true; + } + + if( (totPopSize <= 0.0) || (currentTime >= finalTime) ) { + simulsDone = true; + } + + + // if( (totPopSize >= detectionSize) || + // ( (lastMaxDr >= detectionDrivers) && + // (popSizeOverDDr >= minDetectDrvCloneSz) ) ) { + // simulsDone = true; + // reachDetection = true; + // } + + if(extraTime > 0) { + if(done_at < 0) { + if( (totPopSize >= detectionSize) || + ( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) ) ) { + done_at = currentTime + extraTime; + } + } else if (currentTime >= done_at) { + simulsDone = true; + reachDetection = true; + } + } else if( (totPopSize >= detectionSize) || + ( (lastMaxDr >= detectionDrivers) && + (popSizeOverDDr >= minDetectDrvCloneSz) ) ) { + simulsDone = true; + reachDetection = true; + } + + + + + // This is no longer used. + // // Beware: this can lead to never stopping if + // // decreases in popSize or drivers + + // // Logic: if a period k you meet any condition, recheck again at k + + // // endTimeEvery, and if conditions met exit. Prevents exiting if you + // // reach the cancer state almost by chance. But this is way too + // // paranoid. The idea is of application mainly for McF and Beeren + // // models, so we do not bail out as soon as just a single cell with one + // // new driver. But this makes things very slow. + + // // Thus, never pass an endTimeEvery > 0, but use detectionDrivers = 1 + + // // intended final Drivers. + + // // FIXME + // // Ideally, we would check, for McFL, that popsize of the pop with + // // required number of drivers is at least, say, > initSize. + // // But that is not trivial, as requires more accounting. Do later. + + + // if(endTimeEvery > 0) { + // if(done_at <= 0 ) { + // if( (totPopSize >= detectionSize) || + // (lastMaxDr >= detectionDrivers) ) + // done_at = currentTime + endTimeEvery; + // } else if (currentTime >= done_at) { + // if( (totPopSize >= detectionSize) || + // (lastMaxDr >= detectionDrivers) ) { + // simulsDone = true; + // reachDetection = true; + // } + // else + // done_at = -9; + // } + // } else if( (totPopSize >= detectionSize) || + // (lastMaxDr >= detectionDrivers) ) { + + // simulsDone = true; + // reachDetection = true; + // } + + + if(totPopSize >= fatalPopSize) { + Rcpp::Rcout << "\n\totPopSize > " << fatalPopSize + <<". You are likely to loose precision and run into numerical issues\n"; + } + + if(simulsDone) + storeThis = true; + + + if( storeThis ) { + lastStoredSample = currentTime; + outNS_i++; + int ndr_lp = 0; + double l_pop_s = 0.0; + + time_out.push_back(currentTime); + + for(size_t i = 0; i < popParams.size(); ++i) { + genot_out.push_back(Genotypes[i]); + popSizes_out.push_back(popParams[i].popSize); + index_out.push_back(outNS_i); + + if(popParams[i].popSize > l_pop_s) { + l_pop_s = popParams[i].popSize; + ndr_lp = count_NDrivers(Genotypes[i], NumDrivers); + } + } + sampleTotPopSize.push_back(totPopSize); + sampleLargestPopSize.push_back(l_pop_s); + sampleMaxNDr.push_back(max_ndr); + sampleNDrLargestPop.push_back(ndr_lp); + } + + + + // if( storeThis ) { + // lastStoredSample = currentTime; + // outNS_i++; + // int tmp_ndr = 0; + // int max_ndr = 0; + // int ndr_lp = 0; + // double l_pop_s = 0.0; + + // time_out.push_back(currentTime); + + // for(size_t i = 0; i < popParams.size(); ++i) { + // genot_out.push_back(Genotypes[i]); + // popSizes_out.push_back(popParams[i].popSize); + // index_out.push_back(outNS_i); + // // I have to repeat the counting of drivers here. + // tmp_ndr = count_NDrivers(Genotypes[i], NumDrivers); + // if(tmp_ndr > max_ndr) max_ndr = tmp_ndr; + // if(popParams[i].popSize > l_pop_s) { + // l_pop_s = popParams[i].popSize; + // ndr_lp = tmp_ndr; + // // ndr_lp = count_NDrivers(Genotypes[i], NumDrivers); + // } + // // lastMaxDr = max_ndr; // and this should have been out of the + // // popParams.size() loop + // } + // // lastMaxDr = max_ndr; + // sampleTotPopSize.push_back(totPopSize); + // sampleLargestPopSize.push_back(l_pop_s); + // sampleMaxNDr.push_back(max_ndr); + // sampleNDrLargestPop.push_back(ndr_lp); + // }// else if (keepEvery < 0) { + // // FIXME keepEvery + // // must keep track of results to bail out + + // // FIXME counting max drivers should be done always, like counting + // // totPopSize. + + // int tmp_ndr = 0; + // int max_ndr = 0; + + // for(size_t i = 0; i < popParams.size(); ++i) { + // tmp_ndr = count_NDrivers(Genotypes[i], NumDrivers); + // if(tmp_ndr > max_ndr) max_ndr = tmp_ndr; + // // lastMaxDr = max_ndr; + // } + // lastMaxDr = max_ndr; + // } + + + + + if( !std::isfinite(totPopSize) ) { + throw std::range_error("totPopSize not finite"); + } + if( std::isnan(totPopSize) ) { + throw std::range_error("totPopSize is NaN"); + } + + if(totPopSize > (4.0 * 1e15)) { + if(verbosity > 0) + Rcpp::Rcout << "\nWARNING: popSize > 4e15. Likely loss of precission\n"; + } +} + +// FIXME: I might want to return the actual drivers in each period +// and the actual drivers in the population with largest popsize +// Something like what we do now with whichDrivers +// and count_NumDrivers + + +// static inline void fill_SStats(Rcpp::NumericMatrix& perSampleStats, +// const std::vector& sampleTotPopSize, +// const std::vector& sampleLargestPopSize, +// const std::vector& sampleLargestPopProp, +// const std::vector& sampleMaxNDr, +// const std::vector& sampleNDrLargestPop){ + +// for(size_t i = 0; i < sampleTotPopSize.size(); ++i) { +// perSampleStats(i, 0) = sampleTotPopSize[i]; +// perSampleStats(i, 1) = sampleLargestPopSize[i]; // Never used in R FIXME: remove!! +// perSampleStats(i, 2) = sampleLargestPopProp[i]; // Never used in R +// perSampleStats(i, 3) = static_cast(sampleMaxNDr[i]); +// perSampleStats(i, 4) = static_cast(sampleNDrLargestPop[i]); +// } +// } + +inline void reshape_to_outNS(Rcpp::NumericMatrix& outNS, + const std::vector& uniqueGenotV, + const std::vector& genot_out_ul, + const std::vector& popSizes_out, + const std::vector& index_out, + const std::vector& time_out){ + + std::vector::const_iterator fbeg = uniqueGenotV.begin(); + std::vector::const_iterator fend = uniqueGenotV.end(); + + int column; + + for(size_t i = 0; i < genot_out_ul.size(); ++i) { + column = std::distance(fbeg, lower_bound(fbeg, fend, genot_out_ul[i]) ); + // here(" looping over i "); + outNS(index_out[i], column + 1) = popSizes_out[i]; + } + + for(size_t j = 0; j < time_out.size(); ++j) + outNS(j, 0) = time_out[j]; +} + +static inline void find_unique_genotypes(std::set& uniqueGenotypes, + const std::vector& genot_out_l) { + for(size_t i = 0; i < genot_out_l.size(); ++i) + uniqueGenotypes.insert( genot_out_l[i] ); +} + +static inline void genot_out_to_ullong(std::vector& go_l, + const std::vector& go) { + for(size_t i = 0; i < go.size(); ++i) + go_l[i] = go[i].to_ullong(); +} + + +static inline void uniqueGenotypes_to_vector(std::vector& ugV, + const std::set& uniqueGenotypes) { + ugV.assign(uniqueGenotypes.begin(), uniqueGenotypes.end() ); +} + + +static inline void create_returnGenotypes(Rcpp::IntegerMatrix& returnGenotypes, + const int& numGenes, + const std::vector& uniqueGenotypesV){ + // In C++, as the original were bitsets, pos 0 is at the right + // In R, pos 0 is at the left + + for(size_t i = 0; i < uniqueGenotypesV.size(); ++i) { + Genotype64 tmpbs(uniqueGenotypesV[i]); + for(int j = 0; j < numGenes; ++j) { + returnGenotypes(j, i) = tmpbs[j]; + } + } +} + +// FIXME: change this, now that we keep a count of drivers? +static inline void count_NumDrivers(int& maxNumDrivers, + std::vector& countByDriver, + Rcpp::IntegerMatrix& returnGenotypes, + const int& numDrivers){ + // Rcpp::IntegerVector& totDrivers){ + + // At the end, over all genotypes + // Using a returnGenotypes object as input + // Redundant? + maxNumDrivers = 0; + int tmpdr = 0; + + for(int j = 0; j < returnGenotypes.ncol(); ++j) { + tmpdr = 0; + for(int i = 0; i < numDrivers; ++i) { + tmpdr += returnGenotypes(i, j); + countByDriver[i] += returnGenotypes(i, j); + } + // totDrivers(j) = tmpdr; + if(tmpdr > maxNumDrivers) maxNumDrivers = tmpdr; + } +} + +static inline void whichDrivers(int& totalPresentDrivers, + std::string& strDrivers, + const std::vector& countByDriver){ + std::string comma = ""; + for(size_t i = 0; i < countByDriver.size(); ++i) { + if(countByDriver[i] > 0) { +#ifdef _WIN32 + strDrivers += (comma + SSTR(i + 1)); +#endif +#ifndef _WIN32 + strDrivers += (comma + std::to_string(i + 1)); //SSTR(i + 1)); +#endif + comma = ", "; + ++totalPresentDrivers; + } + } + if(totalPresentDrivers == 0) strDrivers = "NA"; +} + + +static void sample_all_pop_P(std::vector& sp_to_remove, + std::vector& popParams, + // next only used with DEBUGV + const std::vector& Genotypes, + const double& tSample, + const int& mutationPropGrowth){ + + // here("entering sample_all_pop_P"); + // currentTime = tSample; + sp_to_remove.clear(); + // sp_to_remove.push_back(0); + // sp_to_remove[0] = 0; + + for(size_t i = 0; i < popParams.size(); i++) { + //STOPASSERT(popParams[i].Flag == false); + STOPASSERT(popParams[i].timeLastUpdate >= 0.0); + STOPASSERT(tSample - popParams[i].timeLastUpdate >= 0.0); +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.9 ******\n " + << " Species = " << i + << "\n Genotype = " << Genotypes[i] + << "\n sp_id = " << Genotypes[i].to_ullong() // sp_id[i] + << "\n pre-update popSize = " + << popParams[i].popSize + << "\n time of sample = " << tSample + << "\n popParams[i].timeLastUpdate = " + << popParams[i].timeLastUpdate + << ";\n t for Algo2 = " + << tSample - popParams[i].timeLastUpdate + << " \n species R " << popParams[i].R + << " \n species W " << popParams[i].W + << " \n species death " << popParams[i].death + << " \n species birth " << popParams[i].birth; + // << " \n species nextMutationTime " + // << popParams[i].nextMutationTime; +#endif + + // Account for forceSampling. When + // forceSampling, popSize for at least one species + // was updated in previous loop, so we skip that one + if(tSample > popParams[i].timeLastUpdate) { + popParams[i].popSize = + Algo2_st(popParams[i], tSample, mutationPropGrowth); + } + if( popParams[i].popSize <= 0.0 ) { + // this i has never been non-zero in any sampling time + // eh?? + + // If it is 0 here, remove from _current_ population. Anything that + // has had a non-zero size at sampling time is preserved (if it + // needs to be preserved, because it is keepEvery time). + + // sp_to_remove[0]++; + sp_to_remove.push_back(i); + // sp_to_remove[sp_to_remove[0]] = i; +#ifdef DEBUGV + Rcpp::Rcout << "\n\n Removing species i = " << i + << " with sp_id = " << Genotypes[i].to_ullong(); //sp_id[i]; +#endif + } // else { + // popParams[i].Flag = true; + // } +#ifdef DEBUGV + Rcpp::Rcout << "\n\n post-update popSize = " + << popParams[i].popSize << "\n"; +#endif + } + // here("exiting sample_all_pop_P"); +} + + +static void innerBNB(const int& numGenes, + const double& initSize, + const double& K, + // const double& alpha, + const std::string& typeCBN, + // const double& genTime, + const std::string& typeFitness, + const int& mutationPropGrowth, + const double& mu, + const double& sh, + const double& s, + const double& death, + // const double& birthRate, + const double& keepEvery, + const double& sampleEvery, + const int& numDrivers, + const int& initMutant, + const time_t& start_time, + const double& maxWallTime, + const double& finalTime, + const double& detectionSize, + // const double& endTimeEvery, + const int& detectionDrivers, + const double& minDetectDrvCloneSz, + const double& extraTime, + const int& verbosity, + double& totPopSize, + double& em1, + double& em1sc, + // double& n_1, + // double& en1, + double& ratioForce, + double& currentTime, + int& speciesFS, + int& outNS_i, + int& iter, + std::vector& genot_out, + std::vector& popSizes_out, + std::vector& index_out, + std::vector& time_out, + std::vector& sampleTotPopSize, + std::vector& sampleLargestPopSize, + std::vector& sampleMaxNDr, + std::vector& sampleNDrLargestPop, + bool& reachDetection, + std::mt19937& ran_generator, + double& runningWallTime, + bool& hittedWallTime, + Rcpp::IntegerMatrix restrictTable) { + //bool& anyForceRerunIssues + // if(numRuns > 0) { + + double dummyMutationRate = std::max(mu/1000, 1e-13); + // double dummyMutationRate = 1e-10; + // ALWAYS initialize this here, or reinit or rezero + genot_out.clear(); + popSizes_out.clear(); + index_out.clear(); + time_out.clear(); + totPopSize = 0.0; + sampleTotPopSize.clear(); + currentTime = 0.0; + iter = 0; + + outNS_i = -1; + + sampleTotPopSize.clear(); + sampleLargestPopSize.clear(); + sampleMaxNDr.clear(); + sampleNDrLargestPop.clear(); + // end of rezeroing. + + + // } + // anyForceRerunIssues = false; + + bool forceSample = false; + bool simulsDone = false; + double lastStoredSample = 0.0; + + + double minNextMutationTime; + double mutantTimeSinceLastUpdate; + double timeNextPopSample; + double tSample; + + int nextMutant; + unsigned int numSpecies = 0; + int numMutablePosParent = 0; + int mutatedPos = 0; + //int indexMutatedPos = 0; + + unsigned int sp = 0; + //int type_resize = 0; + + int iterL = 1000; + int speciesL = 1000; + //int timeL = 1000; + + double tmpdouble1 = 0.0; + double tmpdouble2 = 0.0; + + std::vectorsp_to_remove(1); + sp_to_remove.reserve(10000); + + // those to update + int to_update = 1; //1: one species; 2: 2 species; 3: all. + int u_1 = -99; + int u_2 = -99; + + Genotype64 newGenotype; + std::vector Genotypes(1); + Genotypes[0].reset(); + + std::vector popParams(1); + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 1 "; + // print_spP(popParams[0]); + // // end debug + + + const int sp_per_period = 5000; + popParams.reserve(sp_per_period); + Genotypes.reserve(sp_per_period); + + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 01 "; + // print_spP(popParams[0]); + // // end debug + + + spParamsP tmpParam; + init_tmpP(tmpParam); + init_tmpP(popParams[0]); + popParams[0].popSize = initSize; + totPopSize = initSize; + + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10000 "; + // print_spP(popParams[0]); + // // end debug + + + + std::vectormutablePos(numGenes); // could be inside getMuatedPos_bitset + + // multimap to hold nextMutationTime + std::multimap mapTimes; + //std::multimap::iterator m1pos; + + int ti_dbl_min = 0; + int ti_e3 = 0; + + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10002 "; + // print_spP(popParams[0]); + // // end debug + + + + // // Beerenwinkel + // double adjust_fitness_B = -std::numeric_limits::infinity(); + //McFarland + double adjust_fitness_MF = -std::numeric_limits::infinity(); + + // for McFarland error + em1 = 0.0; + em1sc = 0.0; + // n_0 = 0.0; + // n_1 = 0.0; + // double tps_0; //, tps_1; + // tps_0 = totPopSize; + // tps_1 = totPopSize; + + + double totPopSize_previous = totPopSize; + double DA_previous = log1p(totPopSize_previous/K); + + // // FIXME debug + // Rcpp::Rcout << "\n popSize[0] at 10004 "; + // print_spP(popParams[0]); + // // end debug + + + + int lastMaxDr = 0; + double done_at = -9; + +#ifdef MIN_RATIO_MUTS + g_min_birth_mut_ratio = DBL_MAX; + g_min_death_mut_ratio = DBL_MAX; + g_tmp1 = DBL_MAX; +#endif + + // // FIXME debug + // Rcpp::Rcout << " popSize[0] at 1b "; + // print_spP(popParams[0]); + // // end debug + + // This long block, from here to X1, is ugly and a mess! + // This is what takes longer to figure out whenever I change + // anything. FIXME!! + + if(initMutant >= 0) + throw std::invalid_argument("initMutant no longer allowed. Bug in R code."); + // if(initMutant >= 0) { + // popParams[0].numMutablePos = numGenes - 1; + // Genotypes[0].set(initMutant); + // // if(typeFitness == "beerenwinkel") { + // // popParams[0].death = 1.0; //note same is in McFarland. + // // // But makes sense here; adjustment in beerenwinkel is via fitness + + // // // initialize to prevent birth/mutation warning with Beerenwinkel + // // // when no mutator. O.w., the defaults + // // if(!mutationPropGrowth) + // // popParams[0].mutation = mu * popParams[0].numMutablePos; + // // popParams[0].absfitness = 1.0 + s; + // // updateRatesBeeren(popParams, adjust_fitness_B, initSize, + // // currentTime, alpha, initSize, + // // mutationPropGrowth, mu); + // // } else if(typeFitness == "mcfarland0") { + // // // death equal to birth of a non-mutant. + // // popParams[0].death = log1p(totPopSize/K); // log(2.0), except rare cases + // // if(!mutationPropGrowth) + // // popParams[0].mutation = mu * popParams[0].numMutablePos; + // // popParams[0].absfitness = 1.0 + s; + // // updateRatesMcFarland0(popParams, adjust_fitness_MF, K, + // // totPopSize, + // // mutationPropGrowth, mu); + // // } else if(typeFitness == "mcfarland") { + // // popParams[0].death = totPopSize/K; + // // popParams[0].birth = 1.0 + s; + // // } else if(typeFitness == "mcfarlandlog") { + // if(typeFitness == "mcfarlandlog") { + // popParams[0].death = log1p(totPopSize/K); + // popParams[0].birth = 1.0 + s; + // } else if(typeFitness == "bozic1") { + // tmpParam.birth = 1.0; + // tmpParam.death = -99.9; + // // } else if (typeFitness == "bozic2") { + // // tmpParam.birth = -99; + // // tmpParam.death = -99; + // } else if (typeFitness == "exp") { + // tmpParam.birth = -99; + // tmpParam.death = death; + // } // else { // linear or log + // // tmpParam.birth = -99; + // // tmpParam.death = death; + // // } + // // if( (typeFitness != "beerenwinkel") && (typeFitness != "mcfarland0") + // // && (typeFitness != "mcfarland") && (typeFitness != "mcfarlandlog")) // wouldn't matter + // // fitness(popParams[0], tmpParam, initMutant, restrictTable, + // // typeCBN, Genotypes[0], birthRate, s, numDrivers, + // // typeFitness, genTime, adjust_fitness_B, sh, + // // adjust_fitness_MF); + + // if( typeFitness != "mcfarlandlog") // wouldn't matter + // fitness(popParams[0], tmpParam, initMutant, restrictTable, + // typeCBN, Genotypes[0], // birthRate, + // s, numDrivers, + // typeFitness, // genTime, adjust_fitness_B, + // sh); + // // adjust_fitness_MF); + // // we pass as the parent the tmpParam; it better initialize + // // everything right, or that will blow. Reset to init + // init_tmpP(tmpParam); + // } else { + popParams[0].numMutablePos = numGenes; + // if(typeFitness == "beerenwinkel") { + // popParams[0].death = 1.0; + // // initialize to prevent birth/mutation warning with Beerenwinkel + // // when no mutator. O.w., the defaults + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = 1.0; + // updateRatesBeeren(popParams, adjust_fitness_B, initSize, + // currentTime, alpha, initSize, + // mutationPropGrowth, mu); + // } else if(typeFitness == "mcfarland0") { + // popParams[0].death = log1p(totPopSize/K); + // if(!mutationPropGrowth) + // popParams[0].mutation = mu * popParams[0].numMutablePos; + // popParams[0].absfitness = 1.0; + // updateRatesMcFarland0(popParams, adjust_fitness_MF, K, + // totPopSize, + // mutationPropGrowth, mu); + // } else if(typeFitness == "mcfarland") { + // popParams[0].birth = 1.0; + // popParams[0].death = totPopSize/K; + // // no need to call updateRates + // } else if(typeFitness == "mcfarlandlog") { + if(typeFitness == "mcfarlandlog") { + popParams[0].birth = 1.0; + popParams[0].death = log1p(totPopSize/K); + // no need to call updateRates + } else if(typeFitness == "bozic1") { + popParams[0].birth = 1.0; + popParams[0].death = 1.0; + // } else if (typeFitness == "bozic2") { + // popParams[0].birth = 0.5/genTime; + // popParams[0].death = 0.5/genTime; + } else if (typeFitness == "exp") { + popParams[0].birth = 1.0; + popParams[0].death = death; + } // else { // linear or log + // popParams[0].birth = birthRate; + // popParams[0].death = death; + // } + // } + + + // // FIXME debug + // Rcpp::Rcout << " popSize[0] at 2 "; + // print_spP(popParams[0]); + // // end debug + + + + // these lines (up to, and including, R_F_st) + // not needed with mcfarland0 or beerenwinkel + if(mutationPropGrowth) + popParams[0].mutation = mu * popParams[0].birth * popParams[0].numMutablePos; + else + popParams[0].mutation = mu * popParams[0].numMutablePos; + + W_f_st(popParams[0]); + R_f_st(popParams[0]); + + // // FIXME debug + // Rcpp::Rcout << " popSize[0] at 3 "; + // print_spP(popParams[0]); + // // end debug + + + // X1: end of mess of initialization block + + popParams[0].pv = mapTimes.insert(std::make_pair(-999, 0)); + + if( keepEvery > 0 ) { + // We keep the first ONLY if we are storing more than one. + outNS_i++; + time_out.push_back(currentTime); + + genot_out.push_back(Genotypes[0]); + popSizes_out.push_back(popParams[0].popSize); + index_out.push_back(outNS_i); + + sampleTotPopSize.push_back(popParams[0].popSize); + sampleLargestPopSize.push_back(popParams[0].popSize); + sampleMaxNDr.push_back(count_NDrivers(Genotypes[0], numDrivers)); + sampleNDrLargestPop.push_back(sampleMaxNDr[0]); + } + // FIXME: why next line and not just genot_out.push_back(Genotypes[i]); + // if keepEvery > 0? We do that already. + // It is just ugly to get a 0 in that first genotype when keepEvery < 0 + // uniqueGenotypes.insert(Genotypes[0].to_ullong()); + timeNextPopSample = currentTime + sampleEvery; + numSpecies = 1; + + +#ifdef DEBUGV + Rcpp::Rcout << "\n the initial species\n"; + print_spP(popParams[0]); +#endif + + + // // FIXME debug + // Rcpp::Rcout << " popSize[0] at 4 "; + // print_spP(popParams[0]); + // // end debug + + + + while(!simulsDone) { + // Check how we are doing with time as first thing. + runningWallTime = difftime(time(NULL), start_time); + if( runningWallTime > maxWallTime ) { + hittedWallTime = true; + forceSample = true; + simulsDone = true; + } + + iter++; + if(verbosity > 1) { + if(! (iter % iterL) ) { + Rcpp::Rcout << "\n\n ... iteration " << iter; + Rcpp::Rcout << "\n ... currentTime " << currentTime <<"\n"; + } + if(!(numSpecies % speciesL )) { + Rcpp::Rcout << "\n\n ... iteration " << iter; + Rcpp::Rcout << "\n\n ... numSpecies " << numSpecies << "\n"; + } + } + + // ************ 5.2 *************** + if(verbosity >= 2) + Rcpp::Rcout <<"\n\n\n*** Looping through 5.2. Iter = " << iter << " \n"; + + tSample = std::min(timeNextPopSample, finalTime); + +#ifdef DEBUGV + Rcpp::Rcout << " DEBUGV\n"; + Rcpp::Rcout << "\n ForceSample? " << forceSample + << " tSample " << tSample + << " currentTime " << currentTime; +#endif + + if(iter == 1) { // handle special case of first iter + // // FIXME debug + // Rcpp::Rcout << " popSize[0] "; + // print_spP(popParams[0]); + // // end debug + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[0], + currentTime, + tSample, + ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, 0, tmpdouble1); + //popParams[0].Flag = false; + popParams[0].timeLastUpdate = currentTime; + } else { // any other iter + if(to_update == 1) { + // we did not sample in previous period. + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[u_1], + currentTime, + tSample, + ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1); + popParams[u_1].timeLastUpdate = currentTime; + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime ******\n For to_update = \n " + << " tSample = " << tSample + + << "\n\n** Species = " << u_1 + << "\n genotype = " << Genotypes[u_1] + << "\n popSize = " << popParams[u_1].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble1 + << " \n species R " << popParams[u_1].R + << " \n species W " << popParams[u_1].W + << " \n species death " << popParams[u_1].death + << " \n species birth " << popParams[u_1].birth; +#endif + + } else if(to_update == 2) { + // we did not sample in previous period. + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[u_1], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1); + tmpdouble2 = ti_nextTime_tmax_2_st(popParams[u_2], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, u_2, tmpdouble2); + popParams[u_1].timeLastUpdate = currentTime; + popParams[u_2].timeLastUpdate = currentTime; + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime ******\n " + << " tSample = " << tSample + + << "\n\n** Species = " << u_1 + << "\n genotype = " << Genotypes[u_1] + << "\n popSize = " << popParams[u_1].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble1 + << " \n species R " << popParams[u_1].R + << " \n species W " << popParams[u_1].W + << " \n species death " << popParams[u_1].death + << " \n species birth " << popParams[u_1].birth + + + << "\n\n** Species = " << u_2 + << "\n genotype = " << Genotypes[u_2] + << "\n popSize = " << popParams[u_2].popSize + << "\n currentTime = " << currentTime + << "\n popParams[i].nextMutationTime = " + << tmpdouble2 + << " \n species R " << popParams[u_2].R + << " \n species W " << popParams[u_2].W + << " \n species death " << popParams[u_2].death + << " \n species birth " << popParams[u_2].birth; + +#endif + + } else { // we sampled, so update all + for(size_t i = 0; i < popParams.size(); i++) { + tmpdouble1 = ti_nextTime_tmax_2_st(popParams[i], + currentTime, + tSample, ti_dbl_min, ti_e3); + mapTimes_updateP(mapTimes, popParams, i, tmpdouble1); + popParams[i].timeLastUpdate = currentTime; + +#ifdef DEBUGV + Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime ******\n " + << " Species = " << i + << "\n genotype = " << Genotypes[i] + << "\n popSize = " << popParams[i].popSize + << "\n currentTime = " << currentTime + // << "\n popParams[i].nextMutationTime = " + // << popParams[i].nextMutationTime + << " \n species R " << popParams[i].R + << " \n species W " << popParams[i].W + << " \n species death " << popParams[i].death + << " \n species birth " << popParams[i].birth; +#endif + } + } + } + if(forceSample) { + // A VERY ugly hack. Resetting tSample to jump to sampling. + tSample = currentTime; + // Need this, o.w. would skip a sampling. + timeNextPopSample = currentTime; + } + + + // ******************** 5.3 and do we sample? *********** + // Find minimum to know if we need to sample the whole pop + getMinNextMutationTime4(nextMutant, minNextMutationTime, + mapTimes); + + if(verbosity >= 2) { + Rcpp::Rcout << "\n\n iteration " << iter << "; minNextMutationTime = " + << minNextMutationTime + << "; timeNextPopSample = " << timeNextPopSample + << "; popParams.size() = " << popParams.size() << "\n"; + } + + // Do we need to sample the population? + if( minNextMutationTime <= tSample ) {// We are not sampling + // ************ 5.3 ************** + currentTime = minNextMutationTime; + + // ************ 5.4 *************** + mutantTimeSinceLastUpdate = currentTime - + popParams[nextMutant].timeLastUpdate; + + popParams[nextMutant].popSize = Algo3_st(popParams[nextMutant], + mutantTimeSinceLastUpdate); + + + + + if(popParams[nextMutant].popSize > (ratioForce * detectionSize)) { + forceSample = true; + ratioForce = std::min(1.0, 2 * ratioForce); +#ifdef DEBUGV + //if(verbosity > -2) { + // We always warn about this, since interaction with ti==0 + Rcpp::Rcout << "\n Forced sampling triggered for next loop: \n " << + " popParams[nextMutant].popSize = " << + popParams[nextMutant].popSize << " > ratioForce * detectionSize \n"; + Rcpp::Rcout << " when nextMutant = " << nextMutant << + " at iteration " << iter << "\n"; + //} +#endif + } + // Check also for numSpecies, and force sampling if needed + // This is very different from the other algos, as we do not yet + // know total number of different species. + // This is a protection against things going wild. Should + // not happen in regular usage. + if(! (numSpecies % speciesFS )) { + forceSample = true; + speciesFS *= 2; +#ifdef DEBUGV + //if(verbosity > -2) // we always warn about this + + Rcpp::Rcout << "\n Forced sampling triggered for next loop " + << " when numSpecies = " << + numSpecies << " at iteration " << iter << "\n"; +#endif + } + // Why are these lines here instead of somewhere else? + // Right before the if for sampling or not? + // FIXME + // runningWallTime = difftime(time(NULL), start_time); + // if( runningWallTime > maxWallTime ) { + // hittedWalllTime = true; + // forceSample = true; + // simulsDone = true; + // } + + + if(popParams[nextMutant].numMutablePos != 0) { + // this is the usual case. The alternative is the dummy or null mutation + + + // ************ 5.5 *************** + getMutatedPos_bitset(mutatedPos, numMutablePosParent, // r, + ran_generator, + mutablePos, + Genotypes[nextMutant], + numGenes); + + // ************ 5.6 *************** + + newGenotype = Genotypes[nextMutant]; + newGenotype.set(mutatedPos); + // newGenotype[mutatedPos] = 1; + + new_sp_bitset(sp, newGenotype, Genotypes); + + if(sp == numSpecies) {// New species + ++numSpecies; + init_tmpP(tmpParam); + + if(verbosity >= 2) { + Rcpp::Rcout <<"\n Creating new species " << (numSpecies - 1) + << " from species " << nextMutant; + } + + tmpParam.popSize = 1; + + fitness(tmpParam, popParams[nextMutant], mutatedPos, + restrictTable, + typeCBN, newGenotype, // birthRate, + s, + numDrivers, typeFitness, // genTime, adjust_fitness_B, + sh); //, adjust_fitness_MF); + + + if(tmpParam.birth > 0.0) { + tmpParam.numMutablePos = numMutablePosParent - 1; + if(mutationPropGrowth) + tmpParam.mutation = mu * tmpParam.birth * tmpParam.numMutablePos; + // tmpParam.mutation = mu * tmpParam.birth * (numMutablePosParent - 1); + else + tmpParam.mutation = mu * tmpParam.numMutablePos; + //tmpParam.mutation = mu * (numMutablePosParent - 1); + if (tmpParam.mutation > 1 ) + Rcpp::Rcout << "WARNING: mutation > 1\n"; + if (numMutablePosParent == 1) { + Rcpp::Rcout << "Note: mutation = 0; no positions left for mutation\n"; + tmpParam.mutation = dummyMutationRate; // dummy mutation here. Set some mu. + } + W_f_st(tmpParam); + R_f_st(tmpParam); + tmpParam.timeLastUpdate = -99999.99999; //mapTimes_updateP does what it should. + popParams.push_back(tmpParam); + Genotypes.push_back(newGenotype); + to_update = 2; +#ifdef MIN_RATIO_MUTS + g_tmp1 = tmpParam.birth/tmpParam.mutation; + if(g_tmp1 < g_min_birth_mut_ratio) g_min_birth_mut_ratio = g_tmp1; + + g_tmp1 = tmpParam.death/tmpParam.mutation; + if(g_tmp1 < g_min_death_mut_ratio) g_min_death_mut_ratio = g_tmp1; +#endif + } else {// fitness is 0, so we do not add it + --sp; + --numSpecies; + to_update = 1; + } + //#ifdef DEBUGV + if(verbosity >= 3) { + Rcpp::Rcout << " \n\n\n Looking at NEW species " << sp << " at creation"; + Rcpp::Rcout << "\n Genotype = " << newGenotype; //Genotypes[sp]; + Rcpp::Rcout << "\n sp_id = " << newGenotype.to_ullong() ; + //Genotypes[sp].to_ullong(); + Rcpp::Rcout << "\n birth of sp = " << tmpParam.birth; + Rcpp::Rcout << "\n death of sp = " << tmpParam.death; + Rcpp::Rcout << "\n s = " << s; + Rcpp::Rcout << "\n parent birth = " << popParams[nextMutant].birth; + Rcpp::Rcout << "\n parent death = " << popParams[nextMutant].death; + Rcpp::Rcout << "\n parent Genotype = " << Genotypes[nextMutant]; + print_spP(tmpParam); + } + //#endif + } else { // A mutation to pre-existing species +#ifdef DEBUGW + if( (currentTime - popParams[sp].timeLastUpdate) <= 0.0) { + DP2(currentTime); + DP2(sp); + DP2(popParams[sp].timeLastUpdate); + print_spP(popParams[sp]); + throw std::out_of_range("currentTime - timeLastUpdate out of range. Serious bug"); + } +#endif + + if(verbosity >= 2) { + Rcpp::Rcout <<"\n Mutated to existing species " << sp + << " (Genotype = " << Genotypes[sp] + << "; sp_id = " << Genotypes[sp].to_ullong() << ")" + << "\n from species " << nextMutant + << " (Genotypes = " << Genotypes[nextMutant] + << "; sp_id = " << Genotypes[sp].to_ullong() << ")"; + } + + // FIXME00: the if can be removed?? + if(popParams[sp].popSize > 0.0) { + popParams[sp].popSize = 1.0 + + Algo2_st(popParams[sp], currentTime, mutationPropGrowth); + if(verbosity >= 2) { + Rcpp::Rcout << "\n New popSize = " << popParams[sp].popSize << "\n"; + } + } else { + throw std::range_error("\n popSize == 0 but existing? \n"); + } + +#ifdef DEBUGW + // This is wrong!!! if we set it to -999999, then the time to + // next mutation will not be properly updated. In fact, the + // mapTimes map becomes a mess because the former pv in the + // popParams is not removed so we end up inserting another pair + // for the same species. + + // popParams[sp].timeLastUpdate = -99999.99999; // to catch errors +#endif + //popParams[sp].Flag = true; + } + // *************** 5.7 *************** + // u_2 irrelevant if to_update = 1; + u_1 = nextMutant; + u_2 = static_cast(sp); + } else { // the null or dummy mutation case + // Rcpp::Rcout << "\n null mutation; before popSize" << std::endl; + // DP2(popParams[nextMutant].popSize); + ++popParams[nextMutant].popSize; + to_update = 1; + u_1 = nextMutant; + u_2 = -99; + // FIXME: do this conditionally on flag + Rcpp::Rcout << "Note: updating in null mutation\n"; + // Rcpp::Rcout << "\n null mutation; after popSize" << std::endl; + // DP2(popParams[nextMutant].popSize); + // Rcpp::Rcout << "\n done null mutation; after popSize ********" << std::endl; + } + } + else { // *********** We are sampling ********** + to_update = 3; //short_update = false; + if(verbosity >= 2) { + Rcpp::Rcout <<"\n We are SAMPLING"; + if(tSample < finalTime) { + Rcpp::Rcout << " at time " << tSample << "\n"; + } else + Rcpp::Rcout <<". We reached finalTime " << finalTime << "\n"; + } + + currentTime = tSample; + if(verbosity >= 3) + Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() << "\n"; + + sample_all_pop_P(sp_to_remove, + popParams, Genotypes, tSample, mutationPropGrowth); + timeNextPopSample += sampleEvery; + + if(sp_to_remove.size()) + remove_zero_sp_v7(sp_to_remove, Genotypes, popParams, mapTimes); + + numSpecies = popParams.size(); + + totPopSize_and_fill_out_crude_P(outNS_i, totPopSize, + lastStoredSample, + genot_out, + //sp_id_out, + popSizes_out, index_out, + time_out, + sampleTotPopSize,sampleLargestPopSize, + sampleMaxNDr, sampleNDrLargestPop, + simulsDone, + reachDetection, + lastMaxDr, + done_at, + Genotypes, popParams, + currentTime, + numDrivers, + keepEvery, + detectionSize, + finalTime, + //endTimeEvery, + detectionDrivers, + verbosity, + minDetectDrvCloneSz, + extraTime); //keepEvery is for thinning + if(verbosity >= 3) { + Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() + << "\n totPopSize after sampling " << totPopSize << "\n"; + } + + // computeMcFarlandError(e1, n_0, tps_0, + // typeFitness, totPopSize, K); //, initSize); + computeMcFarlandError_new(em1, em1sc, totPopSize_previous, DA_previous, + typeFitness, totPopSize, K); + // Largest error in McFarlands' method + // if( (typeFitness == "mcfarland0") || + // (typeFitness == "mcfarland") || + // (typeFitness == "mcfarlandlog") ) { + // tps_1 = totPopSize; + // if(typeFitness == "mcfarland") + // etmp = abs( tps_1 - (tps_0 + 1) ); + // else { + // if( (tps_0 + 1.0) > tps_1 ) + // etmp = (K + tps_0 + 1.0)/(K + tps_1); + // else + // etmp = (K + tps_1)/(K + tps_0 + 1); + // } + // if(etmp > e1) { + // e1 = etmp; + // n_0 = tps_0; + // n_1 = tps_1; + // } + // tps_0 = tps_1; + // } + + // It goes here: zz: not detectionSize, + // but the keepEvery? or sampleUntilKeep. Yes, use that. + // endingSampleEvery. + + // Use driver criterion here!!! + // if endingSampleEvery + // if totPopSize >= detectionSize: + // do not break unless + // tSample %% endingSampleEvery + + // All of this has to be in totPopSize_and_fill + + + // this if not sampleUntilKeep + // if( (totPopSize >= detectionSize) || + // (totPopSize <= 0.0) || (tSample >= finalTime)) { + // simulsDone = true; + // break; // skip last update if beerenwinkel + // } + + if(simulsDone) + break; //skip last updateRates + + // if( (typeFitness == "beerenwinkel") ) { + // updateRatesBeeren(popParams, adjust_fitness_B, + // initSize, currentTime, alpha, totPopSize, + // mutationPropGrowth, mu); + // } else if( (typeFitness == "mcfarland0") ) { + // updateRatesMcFarland0(popParams, adjust_fitness_MF, + // K, totPopSize, + // mutationPropGrowth, mu); + // } else if( (typeFitness == "mcfarland") ) { + // updateRatesMcFarland(popParams, adjust_fitness_MF, + // K, totPopSize); + // } else if( (typeFitness == "mcfarlandlog") ) { + if( (typeFitness == "mcfarlandlog") ) { + updateRatesMcFarlandLog(popParams, adjust_fitness_MF, + K, totPopSize); + } + +#ifdef MIN_RATIO_MUTS + // could go inside sample_all_pop but here we are sure death, etc, current + // But I catch them when they are created. Is this really needed? + for(size_t i = 0; i < popParams.size(); i++) { + g_tmp1 = popParams[i].birth/popParams[i].mutation; + if(g_tmp1 < g_min_birth_mut_ratio) g_min_birth_mut_ratio = g_tmp1; + + g_tmp1 = popParams[i].death/popParams[i].mutation; + if(g_tmp1 < g_min_death_mut_ratio) g_min_death_mut_ratio = g_tmp1; + } +#endif + + forceSample = false; + } + } +} + + +// [[Rcpp::export]] +Rcpp::List BNB_Algo5(Rcpp::IntegerMatrix restrictTable, + int numDrivers, + int numGenes, + Rcpp::CharacterVector typeCBN_, + double s, + double death, + double mu, + double initSize, + double sampleEvery, + double detectionSize, + double finalTime, + int initSp, + int initIt, + int seed, + int verbosity, + int speciesFS, + double ratioForce, + Rcpp::CharacterVector typeFitness_, + int maxram, + int mutationPropGrowth, + int initMutant, + double maxWallTime, + double keepEvery, + double sh, + double K, + int detectionDrivers, + bool onlyCancer, + bool errorHitWallTime, + int maxNumTries, + bool errorHitMaxTries, + double minDetectDrvCloneSz, + double extraTime + ) { + //BEGIN_RCPP + // using namespace Rcpp; + precissionLoss(); + const std::string typeFitness = Rcpp::as(typeFitness_); // no need to do [0] + const std::string typeCBN = Rcpp::as(typeCBN_); // no need to do [0] + // const double genTime = 4.0; // should be a parameter. For Bozic only. + + // const IntegerMatrix restrictTable(restrictTable_); + // const int numDrivers = as(numDrivers_); + // const int numGenes = as(numGenes_); + // const std::string typeCBN = as(typeCBN_); + // const std::string typeFitness = as(typeFitness_); + // // birth and death are irrelevant with Bozic + // const double birthRate = as(birthRate_); + // const double death = as(death_); + // const double s = as(s_); + // const double mu = as(mu_); + // const double initSize = as(initSize_); + // const double sampleEvery = as(sampleEvery_); + // const double detectionSize = as(detectionSize_); + // const double finalTime = as(finalTime_); + // const int initSp = as(initSize_species_); + // const int initIt = as(initSize_iter_); // FIXME: this is a misnomer + // const int verbosity = as(verbose_); + // // const double minNonZeroMut = mu * 0.01; // to avoid == 0.0 comparisons + // double ratioForce = as(ratioForce_); // If a single species this times + // // detectionSize, force a sampling to prevent going too far. + // int speciesFS = as(speciesFS_); // to force sampling when too many + // // species + // const int seed = as(seed_gsl_); + // const long maxram = as(maxram_); + // const int mutationPropGrowth = as(mutationPropGrowth_); + // const int initMutant = as(initMutant_); + // const double maxWallTime = as(maxWallTime_); + // const double keepEvery = as(keepEvery_); + + + // const double alpha = as(alpha_); + // const double sh = as(sh_); // coeff for fitness + // // if a driver without dependencies. Like in Datta et al., 2013. + // const double K = as(K_); //for McFarland + // //const double endTimeEvery = as(endTimeEvery_); + // const int detectionDrivers = as(detectionDrivers_); + + // // const bool errorFinalTime = as(errorFinalTime_); + // const bool errorHitWallTime = as(errorHitWallTime_); + // const bool onlyCancer = as(onlyCancer_); + // const int maxNumTries = as(maxNumTries_); + // const bool errorHitMaxTries = as(errorHitMaxTries_); + // const double minDetectDrvCloneSz = as(minDetectDrvCloneSz_); + // const double extraTime = as(extraTime_); + + // C++11 random number + std::mt19937 ran_generator(seed); + + // some checks. Do this systematically + // FIXME: do only if mcfarland! + if(K < 1 ) + throw std::range_error("K < 1."); + // verify we are OK with usigned long long + if( !(static_cast(std::numeric_limits::max()) + >= pow(2, 64)) ) + throw std::range_error("The size of unsigned long long is too short."); + if(numGenes > 64) + throw std::range_error("This version only accepts up to 64 genes. Should be caught in R"); + + bool runAgain = true; + bool reachDetection = false; + //Output + std::vector genot_out; + std::vector popSizes_out; + std::vector index_out; + std::vector time_out; //only one entry per period! + genot_out.reserve(initSp); + popSizes_out.reserve(initSp); + index_out.reserve(initSp); + time_out.reserve(initIt); + + double totPopSize = 0; + std::vector sampleTotPopSize; + std::vector sampleLargestPopSize; + std::vector sampleMaxNDr; //The number of drivers in the population + // with the largest number of drivers; and this for each time sample + std::vector sampleNDrLargestPop; //Number of drivers in population + // with largest size (at each time sample) + sampleTotPopSize.reserve(initIt); + sampleLargestPopSize.reserve(initIt); + sampleMaxNDr.reserve(initIt); + sampleNDrLargestPop.reserve(initIt); + + + int outNS_i = -1; // the column in the outNS + // time limits + // FIXME think later FIXME + time_t start_time = time(NULL); + double runningWallTime = 0; + bool hittedWallTime = false; + bool hittedMaxTries = false; + + // spParamsP tmpParam; + // std::vector popParams(1); + // const int sp_per_period = 5000; + + // popParams.reserve(sp_per_period); + // Genotypes.reserve(sp_per_period); + + // std::vectormutablePos(numGenes); // could be inside getMuatedPos_bitset + + + // // multimap to hold nextMutationTime + // std::multimap mapTimes; + // //std::multimap::iterator m1pos; + + + // // count troublesome tis + // int ti_dbl_min = 0; + // int ti_e3 = 0; + + + + // // Beerenwinkel + // double adjust_fitness_B = -std::numeric_limits::infinity(); + // //McFarland + // double adjust_fitness_MF = -std::numeric_limits::infinity(); + + // double e1, n_0; //, n_1; // for McFarland error + // double tps_0, tps_1; // for McFarland error + // tps_0 = 0.0; + // tps_1 = 0.0; + // e1 = 0.0; + // n_0 = 0.0; + // n_1 = 0.0; + double em1, em1sc; // new computation of McFarland error + em1 = 0.0; + em1sc = 0.0; + + // // For totPopSize_and_fill and bailing out + // // should be static vars inside funct, + // // but they keep value over calls in same R session. + // int lastMaxDr = 0; + // double done_at = -9; + // // totalPopSize at time t, at t-1 and the max error. + + // 5.1 Initialize + + int numRuns = 0; + bool forceRerun = false; + + double currentTime = 0; + int iter = 0; + while(runAgain) { + + // Initialize a bunch of things + +// #ifdef MIN_RATIO_MUTS +// g_min_birth_mut_ratio = DBL_MAX; +// g_min_death_mut_ratio = DBL_MAX; +// g_tmp1 = DBL_MAX; +// #endif + + + // untilcancer goes here + + + + + //tmpParam is a temporary holder. + // init_tmpP(tmpParam); + // init_tmpP(popParams[0]); + + // lastStoredSample = 0.0; + // Genotypes[0].reset(); + // popParams[0].popSize = initSize; + // totPopSize = initSize; + + // tps_0 = totPopSize; + // e1 = 0.0; + // tps_1 = totPopSize; + + + + try { + // it is CRUCIAL that several entries are zeroed (or -1) at the + // start of innerBNB now that we do multiple runs if onlyCancer = true. + innerBNB( + numGenes, + initSize, + K, + // alpha, + typeCBN, + // genTime, + typeFitness, + mutationPropGrowth, + mu, + sh, + s, + death, + // birthRate, + keepEvery, + sampleEvery, + numDrivers, + initMutant, + start_time, + maxWallTime, + finalTime, + detectionSize, + //endTimeEvery, + detectionDrivers, + minDetectDrvCloneSz, + extraTime, + verbosity, + totPopSize, + em1, + em1sc, + // n_0, + // n_1, + // em1, + ratioForce, + currentTime, + speciesFS, + outNS_i, + iter, + genot_out, + popSizes_out, + index_out, + time_out, + sampleTotPopSize, + sampleLargestPopSize, + sampleMaxNDr, + sampleNDrLargestPop, + reachDetection, + ran_generator, + runningWallTime, + hittedWallTime, + restrictTable); + ++numRuns; + forceRerun = false; + } catch (rerunExcept &e) { + Rcpp::Rcout << "\n Exception " << e.what() + << ". Rerunning."; + forceRerun = true; + } catch (const std::exception &e) { + Rcpp::Rcout << "\n Unrecoverable exception: " << e.what() + << ". Aborting. \n"; + return + Rcpp::List::create(Named("other") = + Rcpp::List::create(Named("UnrecoverExcept") = true, + Named("ExceptionMessage") = e.what())); + } catch (...) { + Rcpp::Rcout << "\n Unknown unrecoverable exception. Aborting. \n"; + return + Rcpp::List::create(Named("other") = + Rcpp::List::create(Named("UnrecoverExcept") = true, + Named("ExceptionMessage") = "Unknown exception")); + } + + + if(hittedWallTime) { + Rcpp::Rcout << "\n Hitted wall time. Exiting."; + runAgain = false; + if(errorHitWallTime) { + Rcpp::Rcout << "\n Hitting wall time is regarded as an error. \n"; + return + Rcpp::List::create(Named("HittedWallTime") = true, + Named("HittedMaxTries") = false, // yes, for + // coherent return + // objects + Named("other") = + Rcpp::List::create(Named("UnrecoverExcept") = false)); + } + } else if(numRuns > maxNumTries) { + // hittedMaxTries + hittedMaxTries = true; + Rcpp::Rcout << "\n Hitted maxtries. Exiting."; + runAgain = false; + if(errorHitMaxTries) { + Rcpp::Rcout << "\n Hitting max tries is regarded as an error. \n"; + return + Rcpp::List::create(Named("HittedWallTime") = false, + Named("HittedMaxTries") = true, + Named("other") = + Rcpp::List::create(Named("UnrecoverExcept") = false)); + } + } else if(forceRerun) { + runAgain = true; + forceRerun = false; + } else { + if(onlyCancer) { + runAgain = !reachDetection; + } else { + runAgain = false; + } + } +#ifdef DEBUGV + Rcpp::Rcout << "\n reachDetection = " << reachDetection; + Rcpp::Rcout << "\n forceRerun = " << forceRerun << "\n"; + +#endif + + } // runAgain loop + // FIXME: zz + // untilcancer + // inner loop ends above + // The return objects only created if needed + + + // If we hit wallTime, we can get done without going through + // totPopSize.... Problem if sampling at end + // if ( hittedWallTime ) { + // // hitted wall time. So we need to sample at the very end. + // Nope! Just ensure if hittedWallTime you always sample properly! + // } + + + + // FIXME: do I want to move this right after out_crude + // and do it incrementally? I'd have also a counter of total unique species + + // here("right after simuls done"); + + // FIXME: all this is ugly and could be a single function + // up to call to IntegerMatrix + std::set uniqueGenotypes; + std::vector genot_out_ullong(genot_out.size()); + genot_out_to_ullong(genot_out_ullong, genot_out); + find_unique_genotypes(uniqueGenotypes, genot_out_ullong); + std::vector uniqueGenotypes_vector(uniqueGenotypes.size()); + uniqueGenotypes_to_vector(uniqueGenotypes_vector, uniqueGenotypes); + // IntegerMatrix returnGenotypes(uniqueGenotypes_vector.size(), numGenes); + Rcpp::IntegerMatrix returnGenotypes(numGenes, uniqueGenotypes_vector.size()); + // here("after creating returnGenotypes"); + create_returnGenotypes(returnGenotypes, numGenes, uniqueGenotypes_vector); + // here("after call to create_returnGenotypes_to_vector"); + + // The out.ns in R code; holder of popSizes over time + // The first row is time, then the genotypes (in column major) + // here("after uniqueGenotypes_to_vector"); + + + int outNS_r, outNS_c, create_outNS; + if( ( (uniqueGenotypes.size() + 1) * (outNS_i + 1) ) > ( pow(2, 31) - 1 ) ) { + Rcpp::Rcout << "\nWARNING: Return outNS object > 2^31 - 1. Not created.\n"; + outNS_r = 1; + outNS_c = 1; + create_outNS = 0; + } else if ( + static_cast((uniqueGenotypes.size()+1) * (outNS_i+1)) * 8 > + (maxram * (1024*1024) ) ) { + Rcpp::Rcout << "\nWARNING: Return outNS object > maxram. Not created.\n"; + outNS_r = 1; + outNS_c = 1; + create_outNS = 0; + } else { + outNS_r = outNS_i + 1; + outNS_c = uniqueGenotypes.size() + 1; + create_outNS = 1; + } + Rcpp::NumericMatrix outNS(outNS_r, outNS_c); + if(create_outNS) { + reshape_to_outNS(outNS, uniqueGenotypes_vector, genot_out_ullong, + popSizes_out, + index_out, time_out); + + } else { + outNS(0, 0) = -99; + } + + int maxNumDrivers = 0; + int totalPresentDrivers = 0; + std::vectorcountByDriver(numDrivers, 0); + std::string occurringDrivers; + + + // here("before count_NumDrivers"); + // IntegerVector totDrivers(returnGenotypes.ncol()); + // count_NumDrivers(maxNumDrivers, returnGenotypes, numDrivers, + // totDrivers); + count_NumDrivers(maxNumDrivers, countByDriver, + returnGenotypes, numDrivers); + + whichDrivers(totalPresentDrivers, occurringDrivers, countByDriver); + + std::vector sampleLargestPopProp(outNS_i + 1); + + if((outNS_i + 1) != static_cast(sampleLargestPopSize.size())) + throw std::length_error("outNS_i + 1 != sampleLargestPopSize.size"); + std::transform(sampleLargestPopSize.begin(), sampleLargestPopSize.end(), + sampleTotPopSize.begin(), + sampleLargestPopProp.begin(), + std::divides()); + + Rcpp::NumericMatrix perSampleStats(outNS_i + 1, 5); + fill_SStats(perSampleStats, sampleTotPopSize, sampleLargestPopSize, + sampleLargestPopProp, sampleMaxNDr, sampleNDrLargestPop); + + // error in mcfarland's + // if((typeFitness == "mcfarland0") || (typeFitness == "mcfarlandlog")) + // e1r = log(e1); + // if(typeFitness == "mcfarland") + // e1r = (1.0/K) * e1; + + // here("before return"); + + // // // debuggin: precompute things + // DP2(simulsDone); + // DP2(maxWallTime); + // DP2(hittedWallTime); + // DP2(outNS_i); + // DP2( sampleMaxNDr[outNS_i]); + // DP2(sampleNDrLargestPop[outNS_i]); + // DP2(sampleLargestPopSize[outNS_i]); + // DP2(sampleLargestPopProp[outNS_i]); + // DP2((runningWallTime > maxWallTime)); + // here("after precomp"); + // here("*******************************************"); + + + return + Rcpp::List::create(Named("pops.by.time") = outNS, + Named("NumClones") = uniqueGenotypes.size(), + Named("TotalPopSize") = totPopSize, + Named("Genotypes") = returnGenotypes, + Named("MaxNumDrivers") = maxNumDrivers, + // Named("MaxDrivers_PerSample") = wrap(sampleMaxNDr), + // Named("NumDriversLargestPop_PerSample") = sampleNDrLargestPop, + // Named("TotPopSize_PerSample") = sampleTotPopSize, + // Named("LargestPopSize_PerSample") = sampleLargestPopSize, + // Named("PropLargestPopSize_PerSample") = sampleLargestPopProp, + Named("MaxDriversLast") = sampleMaxNDr[outNS_i], + Named("NumDriversLargestPop") = sampleNDrLargestPop[outNS_i], + Named("LargestClone") = sampleLargestPopSize[outNS_i], + Named("PropLargestPopLast") = sampleLargestPopProp[outNS_i], + // Named("totDrivers") = totDrivers, + Named("FinalTime") = currentTime, + Named("NumIter") = iter, + // Named("outi") = outNS_i + 1, // silly. Use the real number of samples. FIXME + Named("HittedWallTime") = hittedWallTime, // (runningWallTime > maxWallTime), + Named("HittedMaxTries") = hittedMaxTries, + // Named("iRunningWallTime") = runningWallTime, + // Named("oRunningWallTime") = difftime(time(NULL), start_time), + // Named("ti_dbl_min") = ti_dbl_min, + // Named("ti_e3") = ti_e3, + Named("TotalPresentDrivers") = totalPresentDrivers, + Named("CountByDriver") = countByDriver, + // FIXME: OccurringDrivers underestimates true occurring + // drivers if keepEvery < 0, so we only return the last. + Named("OccurringDrivers") = occurringDrivers, + Named("PerSampleStats") = perSampleStats, + Named("other") = Rcpp::List::create(Named("attemptsUsed") = numRuns, + Named("errorMF") = + returnMFE_new(em1sc, typeFitness), + Named("errorMF_size") = + returnMFE_new(em1, typeFitness), // Used to be e1, not log + // Named("errorMF_n_0") = n_0, +#ifdef MIN_RATIO_MUTS + Named("minDMratio") = + g_min_death_mut_ratio, + Named("minBMratio") = + g_min_birth_mut_ratio, +#else + Named("minDMratio") = -99, + Named("minBMratio") = -99, +#endif + // Named("errorMF_n_1") = n_1, + Named("UnrecoverExcept") = false) + ); + + // END_RCPP + + } + +// FIXME: it would have been nice to have OccuringDrivers for last sampling + +// Count by driver: count the number of time each driver has appeared. Simply count +// over genotypes, so a very crude measure. + +// OccurringDrivers: which drivers have ever appeared (might have disappeared) + +// TotalPresentDrivers: how many drivers have ever appeared. If a driver appears once, +// at one sampling period, it is counted. How is this different +// from MaxNumDrivers? This is the individual drivers. + +// MaxNumDrivers: what is the largest number of drivers in any genotype, ever. +// Note that that genotype might have disappeared. How is this different +// from TotalPresentDrivers? This is the max drivers in a genotype! + +// MaxDriversLast: the largest number of drivers in any of the genotypes +// that exist at the last sample. + +// NumDriversLargestPopLast: the number of drivers in the population with +// largest pop. size at the last sample. + +// The above are also parte of the matrix "PerSampleStats" + +// PerSampleStats: five columns, with one entry per sample +// (and you get the time from pops.by.time, the first column) +// - Total Population Size +// - Pop size of the population with largest size +// - Ratio of the previous two +// - the largest number of drivers in any of the genotypes +// that exist in that sample (i.e., the per-sample equivalent +// of MaxDriversLast or MaxNumDrivers) +// - the number of drivers in the population with +// largest pop. size (i.e., the per-sample equivalent of +// NumDriversLargestPopLast) + + + + +// ******************************************************** +// ******************************************************** + +// timeLastUpdate, sampling, etc. + +//timeLastUpdate only used on the right (i.e. its value is used for +// something in): +// 5.4 (call to Algo3, update the mutated) +// 5.6 (call to Algo2, update a previous sp. to which it mutates) +// 5.9 (sampling) + +// timeLastUpdate ONLY updated on 5.2, when flagged ones are updated. +// (I add a few other places, to catch errors) + +// Any species which are modified (5.4 Algo3 in step 5.7, 5.6 via Algo2, +// or created new) will be flagged + +// When we sample, all have been unflagged as we unflag all flagged in 5.2 + + +// My former confusion might have come from trying to sample by +// iteration, not time. + +// ******************************************************** +// ******************************************************** + + + +// ************** Forced sampling idea ************************ + +// I was doing it wrong. I was forcing sampling, and jumpint to next +// sampling time, but possibly a minNextMutationTime was smaller! +// Now I do it right, but requires ugly hacks. +// Note initial part of 5.2: tSample is set to currentTime +// This is done after a previous asignment to tSample. I force +// getting mutations time, and proceeding pretending we are +// doing the usual thing, but then set tSample to the previous +// currentTime, which unless ti = 0, will make us enter sampling. +// An ugly hack anyway. Just a way of forcing to go to sampling. + +// Alternatively, we could set tSample to currentTime, and not go +// through updateint nextMutation or finding the minNextMutationTime +// This would be slightly faster. But breaks logic of algorithm and +// forces using a &&!forceSample in the test to not sample. + +// Current approach would also allow to decreasing sampling time +// at time progresses, respecting logic of algorithm. + +// Forced sampling, however, unlikely to avoid getting stuck except +// as a way of early sampling for large pop. Remember +// than at the sampling, at least two pops will have a size >= 1. +// +// Note: I do not got through Algo2 in 5.7, because I check t == 0.0. +// But warning if enter Algo2 with t == 0 anyway. + + + +// ALTERNATIVES FOR 5.6 +// The following are two alternative approaches +// which do one fewer comparison. Harder to understand, though{ +// V1 +// s = 0; +// k = 0; + +// for( ; ; ){ +// if( k == numGenes) { //pre-existing species +// break; +// } +// else { +// if(newGenotype[k] == Genotypes(k, s)) k++; +// else { +// s++; +// if(numSpecies >= s) { //create new species +// break; +// } else { +// k = 0; +// } +// } +// } +// } +// V2 +// s = 0; +// for(;;) { +// k = 0; +// while ( newGenotype[k] == Genotypes(k, s) ) { +// k++; +// if(k == numGenes ) { +// // s is same as new genotype +// // Thus: a mutation to a pre-existing species +// // break +// } +// } +// s++; +// if( s == numSpecies ) { +// // create new species +// // break +// } +// } +// } + + + + +// suppose this tree +// 1 -> 2 +// 5 -> 4 -> 3 +// 6 + +// grandparente has 1 and 2 +// father has 1, 2, 3: thus gets same fitness as grandpa +// child has 1, 2, 3, 4: has 4 has unment, it gets same fitness as father, and thus as +// grandpa, but it should get the fitness of three drivers, since +// 4 meets requirement of 3. +// Now grandchild gets 5. here things get fixed. + +// And the same if unment. +// Suppose grandparen gets 1, 2, 3. +// If father now gets 6, it counts as four drivers, but it really only has +// three with the restrictions met. diff --git a/OncoSimulR/src-i386/Makevars b/OncoSimulR/src-i386/Makevars new file mode 100644 index 00000000..33c069f0 --- /dev/null +++ b/OncoSimulR/src-i386/Makevars @@ -0,0 +1,3 @@ +## This is a C++11 package +CXX_STD = CXX11 + diff --git a/OncoSimulR/src-i386/Makevars.win b/OncoSimulR/src-i386/Makevars.win new file mode 100644 index 00000000..33c069f0 --- /dev/null +++ b/OncoSimulR/src-i386/Makevars.win @@ -0,0 +1,3 @@ +## This is a C++11 package +CXX_STD = CXX11 + diff --git a/OncoSimulR/src-i386/OncoSimulR_init.c b/OncoSimulR/src-i386/OncoSimulR_init.c new file mode 100644 index 00000000..e18f4505 --- /dev/null +++ b/OncoSimulR/src-i386/OncoSimulR_init.c @@ -0,0 +1,46 @@ +// This file was automatically generated by 'Kmisc::registerFunctions()' + +#include +#include + +#include + +SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP); +SEXP OncoSimulR_BNB_Algo5(SEXP restrictTableSEXP, SEXP numDriversSEXP, SEXP numGenesSEXP, SEXP typeCBN_SEXP, SEXP sSEXP, SEXP deathSEXP, SEXP muSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutantSEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP shSEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP); +SEXP OncoSimulR_evalRGenotype(SEXP rGSEXP, SEXP rFESEXP, SEXP verboseSEXP, SEXP prodNegSEXP, SEXP calledBy_SEXP); +SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SEXP full2mutator_SEXP, SEXP verboseSEXP, SEXP prodNegSEXP); +// SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP); +SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); + +SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP); +SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); +SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); + +// The number is the number of arguments +R_CallMethodDef callMethods[] = { + {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37}, + {"OncoSimulR_BNB_Algo5", (DL_FUNC) &OncoSimulR_BNB_Algo5, 32}, + {"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5}, + {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6}, + {"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4}, + {"OncoSimulR_genot2AdjMat", (DL_FUNC) &OncoSimulR_genot2AdjMat, 3}, + {"OncoSimulR_peaksLandscape", (DL_FUNC) &OncoSimulR_peaksLandscape, 4}, + {"OncoSimulR_accessibleGenotypes_former", (DL_FUNC) &OncoSimulR_accessibleGenotypes_former, 4}, + // {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2}, + {NULL, NULL, 0} +}; + +void R_init_OncoSimulR(DllInfo *info) { + R_registerRoutines(info, NULL, callMethods, NULL, NULL); + R_useDynamicSymbols(info, FALSE); +} + + + + + + + + + + diff --git a/OncoSimulR/src-i386/RcppExports.cpp b/OncoSimulR/src-i386/RcppExports.cpp new file mode 100644 index 00000000..ac04e493 --- /dev/null +++ b/OncoSimulR/src-i386/RcppExports.cpp @@ -0,0 +1,200 @@ +// This file was generated by Rcpp::compileAttributes +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// nr_BNB_Algo5 +Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery, bool AND_DrvProbExit, Rcpp::List fixation_list); +RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::List >::type rFE(rFESEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type mu_(mu_SEXP); + Rcpp::traits::input_parameter< double >::type death(deathSEXP); + Rcpp::traits::input_parameter< double >::type initSize(initSizeSEXP); + Rcpp::traits::input_parameter< double >::type sampleEvery(sampleEverySEXP); + Rcpp::traits::input_parameter< double >::type detectionSize(detectionSizeSEXP); + Rcpp::traits::input_parameter< double >::type finalTime(finalTimeSEXP); + Rcpp::traits::input_parameter< int >::type initSp(initSpSEXP); + Rcpp::traits::input_parameter< int >::type initIt(initItSEXP); + Rcpp::traits::input_parameter< double >::type seed(seedSEXP); + Rcpp::traits::input_parameter< int >::type verbosity(verbositySEXP); + Rcpp::traits::input_parameter< int >::type speciesFS(speciesFSSEXP); + Rcpp::traits::input_parameter< double >::type ratioForce(ratioForceSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type typeFitness_(typeFitness_SEXP); + Rcpp::traits::input_parameter< int >::type maxram(maxramSEXP); + Rcpp::traits::input_parameter< int >::type mutationPropGrowth(mutationPropGrowthSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type initMutant_(initMutant_SEXP); + Rcpp::traits::input_parameter< double >::type maxWallTime(maxWallTimeSEXP); + Rcpp::traits::input_parameter< double >::type keepEvery(keepEverySEXP); + Rcpp::traits::input_parameter< double >::type K(KSEXP); + Rcpp::traits::input_parameter< int >::type detectionDrivers(detectionDriversSEXP); + Rcpp::traits::input_parameter< bool >::type onlyCancer(onlyCancerSEXP); + Rcpp::traits::input_parameter< bool >::type errorHitWallTime(errorHitWallTimeSEXP); + Rcpp::traits::input_parameter< int >::type maxNumTries(maxNumTriesSEXP); + Rcpp::traits::input_parameter< bool >::type errorHitMaxTries(errorHitMaxTriesSEXP); + Rcpp::traits::input_parameter< double >::type minDetectDrvCloneSz(minDetectDrvCloneSzSEXP); + Rcpp::traits::input_parameter< double >::type extraTime(extraTimeSEXP); + Rcpp::traits::input_parameter< bool >::type keepPhylog(keepPhylogSEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type MMUEF(MMUEFSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type full2mutator_(full2mutator_SEXP); + Rcpp::traits::input_parameter< double >::type n2(n2SEXP); + Rcpp::traits::input_parameter< double >::type p2(p2SEXP); + Rcpp::traits::input_parameter< double >::type PDBaseline(PDBaselineSEXP); + Rcpp::traits::input_parameter< double >::type cPDetect_i(cPDetect_iSEXP); + Rcpp::traits::input_parameter< double >::type checkSizePEvery(checkSizePEverySEXP); + Rcpp::traits::input_parameter< bool >::type AND_DrvProbExit(AND_DrvProbExitSEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type fixation_list(fixation_listSEXP); + __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list)); + return __result; +END_RCPP +} +// BNB_Algo5 +Rcpp::List BNB_Algo5(Rcpp::IntegerMatrix restrictTable, int numDrivers, int numGenes, Rcpp::CharacterVector typeCBN_, double s, double death, double mu, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, int seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, int initMutant, double maxWallTime, double keepEvery, double sh, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime); +RcppExport SEXP OncoSimulR_BNB_Algo5(SEXP restrictTableSEXP, SEXP numDriversSEXP, SEXP numGenesSEXP, SEXP typeCBN_SEXP, SEXP sSEXP, SEXP deathSEXP, SEXP muSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutantSEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP shSEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type restrictTable(restrictTableSEXP); + Rcpp::traits::input_parameter< int >::type numDrivers(numDriversSEXP); + Rcpp::traits::input_parameter< int >::type numGenes(numGenesSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type typeCBN_(typeCBN_SEXP); + Rcpp::traits::input_parameter< double >::type s(sSEXP); + Rcpp::traits::input_parameter< double >::type death(deathSEXP); + Rcpp::traits::input_parameter< double >::type mu(muSEXP); + Rcpp::traits::input_parameter< double >::type initSize(initSizeSEXP); + Rcpp::traits::input_parameter< double >::type sampleEvery(sampleEverySEXP); + Rcpp::traits::input_parameter< double >::type detectionSize(detectionSizeSEXP); + Rcpp::traits::input_parameter< double >::type finalTime(finalTimeSEXP); + Rcpp::traits::input_parameter< int >::type initSp(initSpSEXP); + Rcpp::traits::input_parameter< int >::type initIt(initItSEXP); + Rcpp::traits::input_parameter< int >::type seed(seedSEXP); + Rcpp::traits::input_parameter< int >::type verbosity(verbositySEXP); + Rcpp::traits::input_parameter< int >::type speciesFS(speciesFSSEXP); + Rcpp::traits::input_parameter< double >::type ratioForce(ratioForceSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type typeFitness_(typeFitness_SEXP); + Rcpp::traits::input_parameter< int >::type maxram(maxramSEXP); + Rcpp::traits::input_parameter< int >::type mutationPropGrowth(mutationPropGrowthSEXP); + Rcpp::traits::input_parameter< int >::type initMutant(initMutantSEXP); + Rcpp::traits::input_parameter< double >::type maxWallTime(maxWallTimeSEXP); + Rcpp::traits::input_parameter< double >::type keepEvery(keepEverySEXP); + Rcpp::traits::input_parameter< double >::type sh(shSEXP); + Rcpp::traits::input_parameter< double >::type K(KSEXP); + Rcpp::traits::input_parameter< int >::type detectionDrivers(detectionDriversSEXP); + Rcpp::traits::input_parameter< bool >::type onlyCancer(onlyCancerSEXP); + Rcpp::traits::input_parameter< bool >::type errorHitWallTime(errorHitWallTimeSEXP); + Rcpp::traits::input_parameter< int >::type maxNumTries(maxNumTriesSEXP); + Rcpp::traits::input_parameter< bool >::type errorHitMaxTries(errorHitMaxTriesSEXP); + Rcpp::traits::input_parameter< double >::type minDetectDrvCloneSz(minDetectDrvCloneSzSEXP); + Rcpp::traits::input_parameter< double >::type extraTime(extraTimeSEXP); + __result = Rcpp::wrap(BNB_Algo5(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime)); + return __result; +END_RCPP +} +// evalRGenotype +double evalRGenotype(Rcpp::IntegerVector rG, Rcpp::List rFE, bool verbose, bool prodNeg, Rcpp::CharacterVector calledBy_); +RcppExport SEXP OncoSimulR_evalRGenotype(SEXP rGSEXP, SEXP rFESEXP, SEXP verboseSEXP, SEXP prodNegSEXP, SEXP calledBy_SEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type rG(rGSEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type rFE(rFESEXP); + Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); + Rcpp::traits::input_parameter< bool >::type prodNeg(prodNegSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type calledBy_(calledBy_SEXP); + __result = Rcpp::wrap(evalRGenotype(rG, rFE, verbose, prodNeg, calledBy_)); + return __result; +END_RCPP +} +// evalRGenotypeAndMut +Rcpp::NumericVector evalRGenotypeAndMut(Rcpp::IntegerVector rG, Rcpp::List rFE, Rcpp::List muEF, Rcpp::IntegerVector full2mutator_, bool verbose, bool prodNeg); +RcppExport SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SEXP full2mutator_SEXP, SEXP verboseSEXP, SEXP prodNegSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type rG(rGSEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type rFE(rFESEXP); + Rcpp::traits::input_parameter< Rcpp::List >::type muEF(muEFSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type full2mutator_(full2mutator_SEXP); + Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); + Rcpp::traits::input_parameter< bool >::type prodNeg(prodNegSEXP); + __result = Rcpp::wrap(evalRGenotypeAndMut(rG, rFE, muEF, full2mutator_, verbose, prodNeg)); + return __result; +END_RCPP +} + +// evalRGenotypeAndMut +Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); +RcppExport SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; +// Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); + Rcpp::traits::input_parameter< double >::type th(thSEXP); + __result = Rcpp::wrap(accessibleGenotypes(y, f, numMut, th)); + return __result; + END_RCPP +} + +// genotype fitness matrix to adjacency matrix of genotypes +Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut); +RcppExport SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; +// Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); + __result = Rcpp::wrap(genot2AdjMat(y, f, numMut)); + return __result; + END_RCPP +} + + +// evalRGenotypeAndMut +Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); +RcppExport SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; +// Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); + Rcpp::traits::input_parameter< double >::type th(thSEXP); + __result = Rcpp::wrap(peaksLandscape(y, f, numMut, th)); + return __result; + END_RCPP +} + +// just for testing. Eventually remove +Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); +RcppExport SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; +// Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); + Rcpp::traits::input_parameter< double >::type th(thSEXP); + __result = Rcpp::wrap(accessibleGenotypes_former(y, f, numMut, th)); + return __result; + END_RCPP +} + + +// // readFitnessEffects +// void readFitnessEffects(Rcpp::List rFE, bool echo); +// RcppExport SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP) { +// BEGIN_RCPP +// Rcpp::RNGScope __rngScope; +// Rcpp::traits::input_parameter< Rcpp::List >::type rFE(rFESEXP); +// Rcpp::traits::input_parameter< bool >::type echo(echoSEXP); +// readFitnessEffects(rFE, echo); +// return R_NilValue; +// END_RCPP +// } diff --git a/OncoSimulR/src-i386/accessible_genotypes.cpp b/OncoSimulR/src-i386/accessible_genotypes.cpp new file mode 100644 index 00000000..81aeadbf --- /dev/null +++ b/OncoSimulR/src-i386/accessible_genotypes.cpp @@ -0,0 +1,458 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include +// using namespace Rcpp; + +inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVector& y) { + Rcpp::NumericVector diff = Rcpp::abs( x - y ); + return std::accumulate(diff.begin(), diff.end(), 0); +} + + +// eventually remove this. Left now for testing +// [[Rcpp::export]] +Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut, // + double th) { + // Return just the indices. Could preserve fitness, but would need + // another matrix. + int ng = y.nrow(); //it counts the wt + Rcpp::IntegerMatrix adm(ng, ng); + int numMutdiff = 0; + // I would have thought this would be faster. It ain't. + // The last genotype never accesses anything. + // for(int i = 0; i < (ng - 1); ++i) { + // // Candidate genotypes to be accessed from i are always of larger + // // mutation by 1. And candidates can thus not have smaller index + // for(int j = (i + 1); j < ng; ++j) { + // if( (numMut(j) == (numMut(i) + 1)) && + // ( (f(j) - f(i)) >= th) && + // (HammingDistance(y(i, _), y(j, _)) == 1) ) { + // adm(i, j) = 1; + // } else if( (numMut(j) > (numMut(i) + 1)) ) { + // break; + // } + // } + // } + + // The last genotype never accesses anything. + for(int i = 0; i < (ng - 1); ++i) { + // Candidate genotypes to be accessed from i are always of larger + // mutation by 1. And candidates can thus not have smaller index + for(int j = (i + 1); j < ng; ++j) { + numMutdiff = numMut(j) - numMut(i); + if( numMutdiff > 1) { // no more to search + break; + } else if(numMutdiff == 1) { + // f(j) - f(i) is faster than HammingDistance + // but might lead to more evals? + // or fewer, depending on landscape + if( ( (f(j) - f(i)) >= th) && + (HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) + ) { + adm(i, j) = 1; + // Rcpp::Rcout << "i = " << i << " j = " << j << " adm " << adm(i,j) << "\n"; + } + } + } + } + + + + // Slightly different logic from R: Do not resize object; set the row to + // 0. + int colsum = 0; + // int indicator = 0; // indicator != 0 means we set one row to 0 + // so we need to iterate at least once more. + + // accessible is the genotype number, not the column! WT is 1, + // etc. This makes it easy to keep track of which are accessible. + Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); + + // This is doable in one pass + // while (true) { + // indicator = 0; + for(int k = 1; k < ng; ++k) { + if(accessible(k) > 0) { + colsum = std::accumulate(adm(Rcpp::_, k).begin(), + adm(Rcpp::_, k).end(), 0); + if(colsum == 0) { // This genotype ain't reachable + // Nothing can be reached from this genotype; fill with 0. + adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); + accessible(k) = -9; + // indicator = 1; + } + } + } + // if(indicator == 0) break; + // } + return accessible; +} + + + + + + +// [[Rcpp::export]] +Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut) { + // Return just the indices. Could preserve fitness, but would need + // another matrix. + int ng = y.nrow(); //it counts the wt + Rcpp::NumericMatrix adm(ng, ng); + + // fill with NAs: https://stackoverflow.com/a/23753626 + // Filling with NAs and in general having NAs might lead to performance + // penalties. But I use the NAs in a lot of the code for accessible + // genotypes, etc. + std::fill( adm.begin(), adm.end(), Rcpp::NumericVector::get_na() ) ; + int numMutdiff = 0; + // I would have thought this would be faster. It ain't. + // The last genotype never accesses anything. + // for(int i = 0; i < (ng - 1); ++i) { + // // Candidate genotypes to be accessed from i are always of larger + // // mutation by 1. And candidates can thus not have smaller index + // for(int j = (i + 1); j < ng; ++j) { + // if( (numMut(j) == (numMut(i) + 1)) && + // ( (f(j) - f(i)) >= th) && + // (HammingDistance(y(i, _), y(j, _)) == 1) ) { + // adm(i, j) = 1; + // } else if( (numMut(j) > (numMut(i) + 1)) ) { + // break; + // } + // } + // } + + // The last genotype never accesses anything. + for(int i = 0; i < (ng - 1); ++i) { + // Candidate genotypes to be accessed from i are always of larger + // mutation by 1. And candidates can thus not have smaller index + for(int j = (i + 1); j < ng; ++j) { + numMutdiff = numMut(j) - numMut(i); + if( numMutdiff > 1) { // no more to search + break; + } else if(numMutdiff == 1) { + if( HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) { + adm(i, j) = (f(j) - f(i)); + } + } + } + } + return adm; +} + + +Rcpp::IntegerMatrix integerAdjMat(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut, // + double th) { + // Return a genotype adjacency matrix with a 1 if genotype j is + // accessible (fitness >, within th) from i. + int ng = y.nrow(); //it counts the wt + Rcpp::IntegerMatrix adm(ng, ng); + int numMutdiff = 0; + // I would have thought this would be faster. It ain't. + // The last genotype never accesses anything. + // for(int i = 0; i < (ng - 1); ++i) { + // // Candidate genotypes to be accessed from i are always of larger + // // mutation by 1. And candidates can thus not have smaller index + // for(int j = (i + 1); j < ng; ++j) { + // if( (numMut(j) == (numMut(i) + 1)) && + // ( (f(j) - f(i)) >= th) && + // (HammingDistance(y(i, _), y(j, _)) == 1) ) { + // adm(i, j) = 1; + // } else if( (numMut(j) > (numMut(i) + 1)) ) { + // break; + // } + // } + // } + + // The last genotype never accesses anything. + for(int i = 0; i < (ng - 1); ++i) { + // Candidate genotypes to be accessed from i are always of larger + // mutation by 1. And candidates can thus not have smaller index + for(int j = (i + 1); j < ng; ++j) { + numMutdiff = numMut(j) - numMut(i); + if( numMutdiff > 1) { // no more to search + break; + } else if(numMutdiff == 1) { + // f(j) - f(i) is faster than HammingDistance + // but might lead to more evals? + // or fewer, depending on landscape + if( ( (f(j) - f(i)) >= th) && + (HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) + ) { + adm(i, j) = 1; + // Rcpp::Rcout << "i = " << i << " j = " << j << " adm " << adm(i,j) << "\n"; + } + } + } + } + return adm; +} + + +// used in both peaks and accessible genotypes +Rcpp::IntegerVector accessibleGenotypesPeaksLandscape(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut, // + double th, + bool returnpeaks) { + // Return the indices. This is like accessibleGenotypes, but we do an + // extra loop + int ng = y.nrow(); //it counts the wt + Rcpp::IntegerMatrix adm(ng, ng); + + adm = integerAdjMat(y, f, numMut, th); + + int numMutdiff = 0; + + // Slightly different logic from R: Do not resize object; set the row to + // 0. + int colsum = 0; + // int indicator = 0; // indicator != 0 means we set one row to 0 + // so we need to iterate at least once more. + + // accessible is the genotype number, not the column! WT is 1, + // etc. This makes it easy to keep track of which are accessible. + Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); + // This is doable in one pass + // while (true) { + // indicator = 0; + for(int k = 1; k < ng; ++k) { + if(accessible(k) > 0) { + colsum = std::accumulate(adm(Rcpp::_, k).begin(), + adm(Rcpp::_, k).end(), 0); + if(colsum == 0) { // This genotype ain't reachable + // Nothing can be reached from this genotype; fill with 0. + adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); + accessible(k) = -9; + // indicator = 1; + } + } + } + // if(indicator == 0) break; + // } + if(!returnpeaks) { + return accessible; + } else { + // BEWARE: this will not work if several connected genotypes + // have the same fitness and are maxima + int rowsum = 0; + Rcpp::IntegerVector peaks; + for(int k = 0; k < ng; ++k) { + if(accessible(k) > 0) { + rowsum = std::accumulate(adm(k, Rcpp::_).begin(), + adm(k, Rcpp::_).end(), 0); + if(rowsum == 0) { // This genotype doesn't have children + peaks.push_back(k + 1); // k is index. But in R, WT is in pos 1 + } + } + } + return peaks; + } +} + + + +// [[Rcpp::export]] +Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut, // + double th) { + return accessibleGenotypesPeaksLandscape(y, f, numMut, th, false); +} + +// [[Rcpp::export]] +Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, + Rcpp::NumericVector f, + Rcpp::IntegerVector numMut, // + double th) { + return accessibleGenotypesPeaksLandscape(y, f, numMut, th, true); +} + + + +// // This would make it easier returning the actual accessible genotypes easily +// // preserving the fitness if needed +// // Not being used now +// // [[Rcpp::export]] +// IntegerVector acc_ge(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, +// Rcpp::IntegerVector numMut, +// int ng, //it counts the wt +// double th) { + +// IntegerMatrix adm(ng, ng); +// int numMutdiff = 0; + +// for(int i = 0; i < (ng - 1); ++i) { +// // Candidates are always of larger mutation by 1 +// for(int j = (i + 1); j < ng; ++j) { +// numMutdiff = numMut(j) - numMut(i); +// if(numMutdiff > 1) { // no more to search +// break; +// } else if(numMutdiff == 1) { +// if( ( (f(j) - f(i)) >= th) && +// (HammingDistance(y(i, _), y(j, _)) == 1) ) { +// adm(i, j) = 1; +// } +// } +// } +// } +// // Keeps root in Rows +// IntegerMatrix admtmp = adm(Range(0, ng - 1), Range(1, ng - 1)); + +// // Slightly different logic from R: Do not resize object; set the row to +// // 0. +// int colsum = 0; +// int indicator = 0; // indicator != 0 means we set one row to 0 +// // so we need to iterate at least once more. + +// // accessible is the genotype number, not the column! WT is 1, +// // etc. This makes it easy to keep track of which are accessible. +// IntegerVector accessible = seq_len(ng - 1) + 1; + +// while (true) { +// indicator = 0; +// for(int k = 0; k < (ng - 1); ++k) { +// if(accessible(k) > 0) { +// colsum = std::accumulate(admtmp(_, k).begin(), +// admtmp(_, k).end(), 0); +// if(colsum == 0) { // This genotype ain't reachable +// // Recall row keeps Root. +// // Nothing can be reached from this genotype; fill with 0. +// admtmp(k + 1, _) = IntegerVector(ng - 1); +// accessible(k) = -9; +// indicator = 1; +// } +// } +// } +// if(indicator == 0) break; +// } +// return accessible; +// } + + + + + +// // [[Rcpp::export]] +// Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, +// Rcpp::NumericVector f, +// Rcpp::IntegerVector numMut, // +// double th) { + +// // Return just the indices. Could preserve fitness, but would need +// // another matrix. +// int ng = y.nrow(); //it counts the wt +// Rcpp::IntegerMatrix adm(ng, ng); + +// adm = integerAdjMat(y, f, numMut, th); + +// int numMutdiff = 0; + +// // Slightly different logic from R: Do not resize object; set the row to +// // 0. +// int colsum = 0; +// // int indicator = 0; // indicator != 0 means we set one row to 0 +// // so we need to iterate at least once more. + +// // accessible is the genotype number, not the column! WT is 1, +// // etc. This makes it easy to keep track of which are accessible. +// Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); + +// // This is doable in one pass +// // while (true) { +// // indicator = 0; +// for(int k = 1; k < ng; ++k) { +// if(accessible(k) > 0) { +// colsum = std::accumulate(adm(Rcpp::_, k).begin(), +// adm(Rcpp::_, k).end(), 0); +// if(colsum == 0) { // This genotype ain't reachable +// // Nothing can be reached from this genotype; fill with 0. +// adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); +// accessible(k) = -9; +// // indicator = 1; +// } +// } +// } +// // if(indicator == 0) break; +// // } +// return accessible; +// } + + + + +// // [[Rcpp::export]] +// Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, +// Rcpp::NumericVector f, +// Rcpp::IntegerVector numMut, // +// double th) { +// // Return the indices. This is like accessibleGenotypes, but we do an +// // extra loop +// int ng = y.nrow(); //it counts the wt +// Rcpp::IntegerMatrix adm(ng, ng); + +// adm = integerAdjMat(y, f, numMut, th); + +// int numMutdiff = 0; + +// // Slightly different logic from R: Do not resize object; set the row to +// // 0. +// int colsum = 0; +// // int indicator = 0; // indicator != 0 means we set one row to 0 +// // so we need to iterate at least once more. + +// // accessible is the genotype number, not the column! WT is 1, +// // etc. This makes it easy to keep track of which are accessible. +// Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); +// // This is doable in one pass +// // while (true) { +// // indicator = 0; +// for(int k = 1; k < ng; ++k) { +// if(accessible(k) > 0) { +// colsum = std::accumulate(adm(Rcpp::_, k).begin(), +// adm(Rcpp::_, k).end(), 0); +// if(colsum == 0) { // This genotype ain't reachable +// // Nothing can be reached from this genotype; fill with 0. +// adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); +// accessible(k) = -9; +// // indicator = 1; +// } +// } +// } +// // if(indicator == 0) break; +// // } + +// int rowsum = 0; +// Rcpp::IntegerVector peaks; +// for(int k = 1; k < ng; ++k) { +// if(accessible(k) > 0) { +// rowsum = std::accumulate(adm(k, Rcpp::_).begin(), +// adm(k, Rcpp::_).end(), 0); +// if(rowsum == 0) { // This genotype doesn't have children +// peaks.push_back(k); +// } +// } +// } + + +// return peaks; +// } diff --git a/OncoSimulR/src-i386/bnb_common.cpp b/OncoSimulR/src-i386/bnb_common.cpp new file mode 100644 index 00000000..730f00aa --- /dev/null +++ b/OncoSimulR/src-i386/bnb_common.cpp @@ -0,0 +1,1161 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + +#include +#include "bnb_common.h" +#include "new_restrict.h" // for the TypeModel enum +#include + + +void print_spP(const spParamsP& spP) { + Rcpp::Rcout <<"\n this is spP\n" + <<"\n popSize = " << spP.popSize + <<"\n birth = " << spP.birth + <<"\n death = " << spP.death + <<"\n W = " << spP.W + <<"\n R = " << spP.R + <<"\n mutation = " << spP.mutation + <<"\n timeLastUpdate = " << spP.timeLastUpdate + <<"\n absfitness = " << spP.absfitness + <<"\n numMutablePos =" << spP.numMutablePos + <<"\n"; +} + +double pM_f_st(const double& t, + const spParamsP& spP){ + // For interpretation, recall, from suppl. mat. of their paper, p.2 that + // p M (t)^n0 = G(0, t) is the probability that a mutation has not yet occurred. + + long double Ct = cosh(spP.R * t/2.0); + long double St = sinh(spP.R * t/2.0); + long double lpM = -99.99; + + if( (!std::isfinite(Ct) ) || (!std::isfinite(St)) ) { + throw std::range_error("pM.f: Ct or St too big"); + } + + // my expression, which I think is better + // lpM = (R * Ct + St * (2.0 * death - W ))/(R * Ct + St * (W - 2.0 * growth)); + // theirs, in paper and code + lpM = (spP.R * Ct + 2.0 * spP.death * St - spP.W * St)/ + (spP.R * Ct - 2.0 * spP.birth * St + spP.W * St); + + double pM = static_cast(lpM); + + if( !std::isfinite(pM) ) { + print_spP(spP); + throw std::range_error("pM.f: pM not finite"); + } + if(pM <= 0.0) { + print_spP(spP); + throw std::range_error("pM.f: pM <= 0.0"); + } + return pM; +} + +double ti_nextTime_tmax_2_st(const spParamsP& spP, + const double& currentTime, + const double& tSample, + int& ti_dbl_min, + int& ti_e3) { + // Following the logic of the code by Mather in + // findNextMutationTime + + // We return the nextMutationTime or a value larger than the + // max length of the period (tSample) + + // I also change names rr, r, to match those in Mather r1, r. + + // However, I pass mutation, and split computation to avoid numerical problems + // I was getting ti == 0 and ti < 0 in the other versions with large N. + using namespace Rcpp ; + + double r1; + double ti; + double pM; + + // FIXME: should never happen + if(spP.popSize <= 0.0) { + +#ifdef _WIN32 + throw std::range_error("ti: popSize <= 0. spP.popSize = " + + SSTR(spP.popSize)); +#endif + +#ifndef _WIN32 + throw std::range_error("ti: popSize <= 0. spP.popSize = " + + std::to_string(spP.popSize)); +#endif + } + // long double invpop = 1/spP.popSize; + // long double r; + + double invpop = 1/spP.popSize; + double r; + + + const double epsilon = 10.0; + + // W < 0 is a signal that mutation is zero, and thus ti is Inf + if(spP.mutation == 0) { // spP.W <= -90.0) { + ti = tSample + 2.0 * epsilon; + // yes, this is silly but to differentiate from + // r < pM without further info + // and to lead to finite value in loop for min. + //ti = std::numeric_limits::infinity(); + } else { + + RNGScope scope; + r1 = ::Rf_runif(0.0, 1.0); + // this was in the original Mather code, but I doubt + // it really makes it more stable, and seems more expensive + // r = exp((1.0 / n) * log(r1)); + // r = pow(r1, 1.0/spP.popSize); //what I do + // r = exp( invpop * log(static_cast(r1)) ); + //r = pow(static_cast(r1), invpop); // what I do + r = pow(r1, invpop); // what I do + pM = pM_f_st(tSample - currentTime, spP); + + if( r < pM) {// time to mutation longer that this time period + ti = tSample + epsilon; + } else { + // Expand numerator and denominatior, the term for W and simplify. + // Then, express as (1- r) (which is, inclussively, between 0 and 1) + // and then multiply by -1 to take the log of each + + // long double tmp2 = 2.0L * spP.mutation; + // long double tmp = (spP.birth - spP.death) - spP.mutation; + // long double oneminusr = 1.0L - r; + // long double numerator = oneminusr * (tmp + spP.R) + tmp2; + // long double denominator = oneminusr * (tmp - spP.R ) + tmp2; + + double tmp2 = 2.0L * spP.mutation; + double tmp = (spP.birth - spP.death) - spP.mutation; + double oneminusr = 1.0L - r; + double numerator = oneminusr * (tmp + spP.R) + tmp2; + double denominator = oneminusr * (tmp - spP.R ) + tmp2; + + // numerator = (1.0 - r) * (spP.R + spP.birth - spP.death - spP.mutation) + // + 2.0 * spP.mutation; + // denominator = (1.0 - r) * (spP.birth - spP.death - spP.mutation - spP.R ) + // + 2.0 * spP.mutation; + + // FIXME? is it really necessary to use log(-a) - log(-b) or could + // I just use log(a/b), where a and b are -numerator and -denominator? + // use the log of ratio, in case negative signs in numerator or denom. + // long double invspr = 1.0L/spP.R; + // ti = static_cast(invspr * (log(numerator) - log(denominator))); + double invspr = 1.0L/spP.R; + ti = invspr * log(numerator/denominator); + + + //ti = (1.0/spP.R) * (log(numerator) - log(denominator)); + + //eq. 11 + // ti = (1.0/R) * (log( -1 * (r * (R - W + 2.0 * growth) - W - R + 2.0 * death )) - + // log( -1 * (r * (-R -W + 2.0 * growth) - W + R + 2.0 * death ))); + + // ti = (1.0/R) * log( (r * (R - W + 2.0 * growth) - W - R + 2.0 * death) / + // (r * (-R -W + 2.0 * growth) - W + R + 2.0 * death)); + // Rcpp::Rcout << "\n this is ti = " << ti << "\n"; + if(ti < 0.0) { + double eq12 = pow( (spP.R - spP.W + 2.0 * spP.death) / + (spP.R + spP.W - 2.0 * spP.birth) , spP.popSize); + + Rcpp::Rcout << "\n ERROR: ti: eq.11 < 0 \n"; + // Rcpp::Rcout << "\n R = " << R; + // Rcpp::Rcout << "\n W = " << W; + // Rcpp::Rcout << "\n r1 = " << r1; + // Rcpp::Rcout << "\n r = " << r; + // Rcpp::Rcout << "\n n = " << n; + // Rcpp::Rcout << "\n mu = " << mu; + // Rcpp::Rcout << "\n growth = " << growth; + // Rcpp::Rcout << "\n death = " << death << "\n"; + Rcpp::Rcout << "\n numerator = " << numerator; + Rcpp::Rcout << "\n denominator = " << denominator; + Rcpp::Rcout << "\n is r > 1? " << (r > 1.0) << "\n"; + Rcpp::Rcout << "\n is r < 0? " << (r < 0.0) << "\n"; + Rcpp::Rcout << "\n is eq12 < r? " << (eq12 < r) << "\n"; + throw std::range_error("ti: eq.11 < 0"); + } + if( !std::isfinite(ti) ) { + double eq12 = pow( (spP.R - spP.W + 2.0 * spP.death) / + (spP.R + spP.W - 2.0 * spP.birth) , spP.popSize); + double numerator2 = r * (spP.R - spP.W + 2.0 * spP.birth) - + spP.W - spP.R + 2.0 * spP.death; + double denominator2 = r * (-spP.R - spP.W + 2.0 * spP.birth) - + spP.W + spP.R + 2.0 * spP.death; + double ti2 = invspr * log(numerator2/denominator2); + + if(std::abs(ti - ti2) > 1e-5) { + DP2(ti); + DP2(ti2); + DP2(numerator); + DP2(numerator2); + DP2(denominator); + DP2(denominator2); + DP2(invspr); + DP2(r); + DP2(r1); + DP2(tmp); + DP2(tmp2); + DP2(oneminusr); + //print_spP(spP); + } + // Rcpp::Rcout << "\n ERROR: ti not finite \n"; + // Rcpp::Rcout << "\n R = " << R; + // Rcpp::Rcout << "\n W = " << W; + Rcpp::Rcout << "\n r1 = " << r1; + Rcpp::Rcout << "\n r = " << r; + // Rcpp::Rcout << "\n n = " << n; + // Rcpp::Rcout << "\n growth = " << growth; + // Rcpp::Rcout << "\n death = " << death << "\n"; + Rcpp::Rcout << "\n numerator = " << numerator; + Rcpp::Rcout << "\n denominator = " << denominator; + Rcpp::Rcout << "\n ti2 = " << ti2; + Rcpp::Rcout << "\n numerator2 = " << numerator2; + Rcpp::Rcout << "\n denominator2 = " << denominator2; + + Rcpp::Rcout << "\n is r > 1? " << (r > 1.0) << "\n"; + Rcpp::Rcout << "\n is r < 0? " << (r < 0.0) << "\n"; + Rcpp::Rcout << "\n is eq12 < r? " << (eq12 < r) << "\n"; + Rcpp::Rcout << "\n tmp = " << tmp << "\n"; + Rcpp::Rcout << "\n tmp2 = " << tmp2 << "\n"; + Rcpp::Rcout << "\n eq12 = " << eq12 << "\n"; + print_spP(spP); + throw std::range_error("ti: ti not finite"); + } + if((ti == 0.0) || (ti <= DBL_MIN)) { +// #ifdef DEBUGW +// // this is too verbose for routine use +// std::string ti_dbl_comp; +// if( ti == DBL_MIN) { +// ti_dbl_comp = "ti_equal_DBL_MIN"; +// DP2(ti); +// } else if (ti == 0.0) { +// ti_dbl_comp = "ti_equal_0.0"; +// } else if ( (ti < DBL_MIN) && (ti > 0.0) ) { +// ti_dbl_comp = "ti_gt_0.0_lt_DBL_MIN"; +// DP2(ti); +// } else { +// ti_dbl_comp = "IMPOSSIBLE!"; +// } +// DP2(ti_dbl_comp); +// #endif +#ifdef DEBUGV + // FIXME: pass verbosity as argument, and return the warning + // if set to more than 0? + Rcpp::Rcout << "\n\n\n WARNING: ti == 0. Setting it to DBL_MIN \n"; + + double eq12 = pow( (spP.R - spP.W + 2.0 * spP.death) / + (spP.R + spP.W - 2.0 * spP.birth) , spP.popSize); + + Rcpp::Rcout << "\n tmp2 = " << tmp2; + Rcpp::Rcout << "\n tmp = " << tmp; + Rcpp::Rcout << "\n invspr = " << invspr; + Rcpp::Rcout << "\n invpop = " << invpop; + // Rcpp::Rcout << "\n R = " << R; + // Rcpp::Rcout << "\n W = " << W; + // Rcpp::Rcout << "\n r1 = " << r1; + // Rcpp::Rcout << "\n r = " << r; + // Rcpp::Rcout << "\n n = " << n; + // Rcpp::Rcout << "\n growth = " << growth; + // Rcpp::Rcout << "\n death = " << death << "\n"; + Rcpp::Rcout << "\n numerator = " << numerator; + Rcpp::Rcout << "\n denominator = " << denominator; + Rcpp::Rcout << "\n numerator == denominator? " << + (numerator == denominator); + Rcpp::Rcout << "\n is r > 1? " << (r > 1.0); + Rcpp::Rcout << "\n is r < 0? " << (r < 0.0); + Rcpp::Rcout << "\n r = " << r; + Rcpp::Rcout << "\n is r == 1? " << (r == 1.0L); + Rcpp::Rcout << "\n oneminusr = " << oneminusr; + Rcpp::Rcout << "\n is oneminusr == 0? " << (oneminusr == 0.0L); + Rcpp::Rcout << "\n r1 = " << r1; + Rcpp::Rcout << "\n is r1 == 1? " << (r1 == 1.0); + Rcpp::Rcout << "\n is eq12 < r? " << (eq12 < r); +#endif + ++ti_dbl_min; + ti = DBL_MIN; + // Beware of this!! throw std::range_error("ti set to DBL_MIN"); + // Do not exit. Record it. We check for it now in R code. Maybe + // abort simulation and go to a new one? + // Rcpp::Rcout << "ti set to DBL_MIN\n"; + // Yes, abort because o.w. we can repeat it many, manu times + // throw std::range_error("ti set to DBL_MIN"); + // Even just touching DBL_MIN is enough to want a rerunExcept; + // no need for it to be 0.0. + + // Trying to understand what happens + Rcpp::Rcout << " ti set to DBL_MIN: spP.popSize = " << spP.popSize << "\n"; + // It seems poSize over 1e8, and even 3.5 e7 can trigger this exception (depending + // on mutation rate, of course) + throw rerunExcept("ti set to DBL_MIN"); + } + if(ti < (2*DBL_MIN)) ++ti_e3; // Counting how often this happens. + // Can be smaller than the ti_dbl_min count + ti += currentTime; + // But we can still have issues here if the difference is too small + if( (ti <= currentTime) ) { + // Rcpp::Rcout << "\n (ti <= currentTime): expect problems\n"; + throw rerunExcept("ti <= currentTime"); + } + } + } + return ti; +} + +double Algo2_st(const spParamsP& spP, + const double& ti, + const int& mutationPropGrowth) { // need mutPropGrowth to + // know if we should + // throw + + // beware the use of t: now as it used to be, as we pass the value + // and take the diff in here: t is the difference + + using namespace Rcpp ; + double t = ti - spP.timeLastUpdate; + + // if (t == 0 ) { + // Rcpp::Rcout << "\n Entered Algo2 with t = 0\n" << + // " Is this a forced sampling case?\n"; + // return num; + // } + + if (spP.popSize == 0.0) { + #ifdef DEBUGW + Rcpp::Rcout << "\n Entered Algo2 with pop size = 0\n"; + #endif + return 0.0; + } + + + if( (spP.mutation == 0.0) && + !(spP.birth <= 0 && mutationPropGrowth) ) { + Rcpp::Rcout << "\n Entered Algo2 with mutation rate = 0\n"; + if( spP.numMutablePos != 0 ) + throw std::range_error("mutation = 0 with numMutable != 0?"); + } + + + // double pm, pe, pb; + double m; // the holder for the binomial + + double pm = pM_f_st(t, spP); + double pe = pE_f_st(pm, spP); + double pb = pB_f_st(pe, spP); + + // if(spP.numMutablePos == 0) { + // // Just do the math. In this case mutation rate is 0. Thus, pM (eq. 8 + // // in paper) is 1 necessarily. And pE is birth/death rates (if you do + // // things sensibly, not multiplying numbers as in computing pE + // // below). And pB is also 1. + + // // This will blow up below if death > birth, as then pe/pm > 1 and 1 - + // // pe/pm < 0. But I think this would just mean this is extinct? + // pm = 1; + // pe = spP.death/spP.birth; + // pb = 1; + // } else { + // pm = pM_f_st(t, spP); + // pe = pE_f_st(pm, spP); + // pb = pB_f_st(pe, spP); + // } + + double rnb; // holder for neg. bino. So we can check. + double retval; //So we can check + + if( (1.0 - pe/pm) > 1.0) { + Rcpp::Rcout << "\n ERROR: Algo 2: (1.0 - pe/pm) > 1.0\n"; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 2: 1 - pe/pm > 1"); + } + + if( (1.0 - pe/pm) < 0.0 ) { + Rcpp::Rcout << "\n ERROR: Algo 2, (1.0 - pe/pm) < 0.0 \n" + << " t = " << t << "; R = " << spP.R + << "; W = " << spP.W << ";\n death = " << spP.death + << "; growth = " << spP.birth << ";\n pm = " << pm + << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 2: 1 - pe/pm < 0"); + } + + if( pb > 1.0 ) { + // Rcpp::Rcout << "\n WARNING: Algo 2, pb > 1.0 \n"; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 2: pb > 1 "); + } + + if( pb < 0.0 ) { + // Rcpp::Rcout << "\n WARNING: Algo 2, pb < 0.0 \n"; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 2: pb < 0"); + } + //} + + + if( pe == pm ) { + // Should never happen. Exact identity?? + Rcpp::Rcout << "\n WARNING: Algo 2: pe == pm \n" ; + // << "; pm = " << pm << "; pe = " + // << pe << " pe == 0? " << (pe == 0) << "\n"; + // t << "; R = " << R + // << "; W = " << W << "; death = " << death + // << "; growth = " << growth << "; pm = " << pm + // << "; pe = " << pe << std::endl; + return 0.0; + } + + + RNGScope scope; + m = ::Rf_rbinom(spP.popSize, 1.0 - (pe/pm)); + // this is dangerous. I'd rather throw an exception and bail out soon + // if(std::isnan(m)) { + // // we can get issues with rbinom and odd numbers > 1e15 + // // see "example-binom-problems.cpp" + // // hack this, and issue a warning + // Rcpp::Rcout << "\n\nWARNING: Using hack around rbinom NaN problem in Algo2\n"; + // m = ::Rf_rbinom(spP.popSize + 1, 1.0 - (pe/pm)); + // } + if(m <= 0.5) { // they are integers, so 0 or 1. + #ifdef DEBUGW // just checking + if(m != 0.0) + Rcpp::Rcout << "\n WARNING: Algo 2: 0.0 < m < 0.5" < 1.0) { + // Rcpp::Rcout << "\n ERROR: Algo 3: (1.0 - pe/pm) > 1.0\n"; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 3: 1 - pe/pm > 1"); + } + + if( (1.0 - pe/pm) < 0.0 ) { + // Rcpp::Rcout << "\n ERROR: Algo 3, (1.0 - pe/pm) < 0.0\n "; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 3: 1 - pe/pm < 0"); + } + + if( pb > 1.0 ) { + // Rcpp::Rcout << "\n WARNING: Algo 3, pb > 1.0\n "; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 3: pb > 1 "); + } + + if( pb < 0.0 ) { + // Rcpp::Rcout << "\n WARNING: Algo 3, pb < 0.0\n "; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb << std::endl; + throw std::range_error("Algo 3: pb < 0"); + } + + if( pe == pm ) { + // Should never happen. Exact identity?? + Rcpp::Rcout << "\n WARNING: Algo 3: pm == pe\n"; + // << "; t = " << + // t << "; R = " << R + // << "; W = " << W << "; death = " << death + // << "; growth = " << growth << "; pm = " << pm + // << "; pb = " << pb << std::endl; + + return 0.0; + } + + RNGScope scope; + m = ::Rf_rbinom(spP.popSize - 1.0, 1.0 - (pe/pm)); + // dangerous + // if(std::isnan(m)) { + // // we can get issues with rbinom and odd numbers > 1e15 + // // see "example-binom-problems.cpp" + // // hack this, and issue a warning + // Rcpp::Rcout << "\n\nWARNING: Using hack around rbinom NaN problem in Algo3\n"; + // m = ::Rf_rbinom(spP.popSize, 1.0 - (pe/pm)); + // } + rnb = ::Rf_rnbinom(m + 2.0, 1.0 - pb); + + // if(std::isnan(rnb)) { + // Rcpp::Rcout << "\n\nWARNING: Using hack around rnbinom NaN problem in Algo3\n"; + // rnb = ::Rf_rnbinom(m + 1.0, 1.0 - pb); + // } + retval = m + 1 + rnb; + + if( !std::isfinite(retval) ) { + DP2(rnb); DP2(m); DP2(pe); DP2(pm); + print_spP(spP); + // Rcpp::Rcout << "\n ERROR: Algo 3, retval not finite\n "; + // << " t = " << t << "; R = " << R + // << "; W = " << W << ";\n death = " << death + // << "; growth = " << growth << ";\n pm = " << pm + // << "; pe = " << pe << "; pb = " << pb + // << "; m = " << m << " ; rnb = " << rnb << std::endl; + throw std::range_error("Algo 3: retval not finite"); + } + if( !std::isfinite(retval) ) { + DP2(rnb); DP2(m); DP2(pe); DP2(pm); + print_spP(spP); + throw std::range_error("Algo 3: retval is NaN"); + } + return retval; +} + + + + +void precissionLoss(){ + // We are storing population sizes as doubles. + // Should not loose any precission up to 2^53 - 1 + // (e.g., http://stackoverflow.com/a/1848762) + // but double check if optims break it. + // Note that the original code by Mather stores it as int. + + // Problems are likely to arise sooner, with 4.5e15, because + // of rbinom. See notes in example-binom-problems.cpp + // We warn about that if totPopSize > 4e15 + double a, b, c, d; + int e, f; + a = pow(2, 52) + 1.0; + b = pow(2, 52); // 2^53 a little over 9*1e15 + c = (9.0 * 1e15) + 1.0; + d = (9.0 * 1e15); + + e = static_cast(a - b); + f = static_cast(c - d); + + if( a == b) Rcpp::Rcout << "WARNING!!!! \n Precission loss: a == b\n"; + if( !(a > b)) Rcpp::Rcout << "WARNING!!!! \n Precission loss: !(a > b)\n"; + if(c == d) Rcpp::Rcout << "WARNING!!!! \n Precission loss: c == d\n"; + if( !(c > d)) Rcpp::Rcout << "WARNING!!!! \n Precission loss: !(c > d)\n"; + if( e != 1 ) Rcpp::Rcout << "WARNING!!!! \n Precission loss: e != 1\n"; + if( f != 1 ) Rcpp::Rcout << "WARNING!!!! \n Precission loss: f != 1\n"; +} + +void init_tmpP(spParamsP& tmpParam) { + tmpParam.popSize = -std::numeric_limits::infinity(); + tmpParam.birth = -std::numeric_limits::infinity(); + tmpParam.death = -std::numeric_limits::infinity(); + tmpParam.W = -std::numeric_limits::infinity(); + tmpParam.R = -std::numeric_limits::infinity(); + tmpParam.mutation = -std::numeric_limits::infinity(); + tmpParam.timeLastUpdate = std::numeric_limits::infinity(); + tmpParam.absfitness = -std::numeric_limits::infinity(); + tmpParam.numMutablePos = -999999; +} + + + +// this is the log of the ratio of death rates +// so the the difference of the successie death rates, if using +// the log version. +// double returnMFE(double& e1, +// const double& K, +// const std::string& typeFitness) { +// if((typeFitness == "mcfarland0") || (typeFitness == "mcfarlandlog")) +// return log(e1); +// else if(typeFitness == "mcfarland") +// return ((1.0/K) * e1); +// else +// return -99; +// } + + +// double returnMFE(double& e1, +// const double& K, +// const TypeModel typeModel) { +// if((typeModel == TypeModel::mcfarland0) || (typeModel == TypeModel::mcfarlandlog)) +// return log(e1); +// else if(typeModel == TypeModel::mcfarland) +// return ((1.0/K) * e1); +// else +// return -99; +// } + +// double returnMFE(double& e1, +// // const double& K, +// const std::string& typeFitness) { +// if(typeFitness == "mcfarlandlog") +// return log(e1); +// else +// return -99; +// } + +// double returnMFE(double& e1, +// // const double& K, +// const TypeModel typeModel) { +// if(typeModel == TypeModel::mcfarlandlog) +// return log(e1); +// else +// return -99; +// } + +// Get a -99 where there should be no error because of model +double returnMFE_new(double& en1, + const std::string& typeFitness) { + if(typeFitness == "mcfarlandlog") + return en1; + else + return -99; +} + +double returnMFE_new(double& en1, + const TypeModel typeModel) { + if(typeModel == TypeModel::mcfarlandlog) + return en1; + else + return -99; +} + + + + + +// FIXME But I'd probably want a percent error, compared to the death rate +// something like (log(1+N1/K) - log(1+N2/K))/(log(1+N1/K)) + + +// void computeMcFarlandError(double& e1, +// double& n_0, +// double& n_1, +// double& tps_0, +// double& tps_1, +// const std::string& typeFitness, +// const double& totPopSize, +// const double& K){ +// // const double& initSize) { +// // static double tps_0 = initSize; +// // static double tps_1 = 0.0; + +// if( (typeFitness == "mcfarland0") || +// (typeFitness == "mcfarland") || +// (typeFitness == "mcfarlandlog") ) { + +// double etmp; +// tps_1 = totPopSize; +// if(typeFitness == "mcfarland") +// etmp = std::abs( tps_1 - (tps_0 + 1) ); +// else { +// if( (tps_0 + 1.0) > tps_1 ) +// etmp = (K + tps_0 + 1.0)/(K + tps_1); +// else +// etmp = (K + tps_1)/(K + tps_0 + 1); +// } +// if(etmp > e1) { +// e1 = etmp; +// n_0 = tps_0; +// n_1 = tps_1; +// } +// tps_0 = tps_1; +// } +// } + +// Former twisted function, which contains an (irrelevant for practical +// purposes) error when we go from No = N1 - 1. + +// void computeMcFarlandError(double& e1, +// double& n_0, +// double& n_1, +// double& tps_0, +// double& tps_1, +// const std::string& typeFitness, +// const double& totPopSize, +// const double& K){ + +// if(typeFitness == "mcfarlandlog") { +// double etmp; +// tps_1 = totPopSize; +// if( (tps_0 + 1.0) > tps_1 ) +// etmp = (K + tps_0 + 1.0)/(K + tps_1); +// else +// etmp = (K + tps_1)/(K + tps_0 + 1); +// if(etmp > e1) { +// e1 = etmp; +// n_0 = tps_0; +// n_1 = tps_1; +// } +// tps_0 = tps_1; +// } +// } + +// void computeMcFarlandError(double& e1, +// double& n_0, // for the hell of keeping it +// double& tps_0, +// const std::string& typeFitness, +// const double& totPopSize, +// const double& K){ + +// if(typeFitness == "mcfarlandlog") { +// double etmp; +// double tps_1 = totPopSize; +// if( tps_1 > tps_0 ) { +// etmp = (K + tps_1)/(K + tps_0); +// } else if ( tps_0 > tps_1 ) { +// etmp = (K + tps_0)/(K + tps_1); +// } else { // no change or change by 1 means no error +// etmp = 1; +// } +// if(etmp > e1) { +// e1 = etmp; +// n_0 = tps_0; // just for the hell of keeping it +// } +// tps_0 = tps_1; +// } +// } + +// // The logic +// // Death is log( 1 + N/K) so log( (K + N)/K ) + +// // We go from size at A (tps_0) to size at C (tps_1) + +// // These expressions compute the absolute value of the difference in death +// // rates between the actual death rate (DC) and the death rate that would +// // have taken place if change had been by one birth or death (DB): + +// // Suppose DC > DA: +// // DC - DA = log( (K + tps_1)/K ) - log( (K + tps_0 + 1)/N ) = +// // = log( (K + tps_1)/(K + tps_0 + 1) ) +// // To avoid logs, we store the ratio. + + + +// void computeMcFarlandError(double& e1, +// double& e1std, +// double& n_0, // for the hell of keeping it +// double& tps_0, +// const TypeModel typeModel, +// const double& totPopSize, +// const double& K){ + +// if( typeModel == TypeModel::mcfarlandlog ) { +// double etmp; +// double etmpstd; +// double tps_1 = totPopSize; +// if( tps_1 > tps_0 ) { +// etmp = (K + tps_1)/(K + tps_0); +// } else if ( tps_0 > tps_1 ) { +// etmp = (K + tps_0)/(K + tps_1); +// } else { // no change or change by less than 1 means no error +// etmp = 1; +// } +// if(etmp > e1) { +// e1 = etmp; +// n_0 = tps_0; // just for the hell of keeping it +// } +// tps_0 = tps_1; +// } +// } + + + +void computeMcFarlandError_new(double& em1, + double& em1sc, // scaled + double& totPopSize_previous, + double& DA_previous, + const TypeModel typeModel, + const double& totPopSize, + const double& K){ + // Simple logic: + // Really, simple thing: compute difference between successive death + // rates, and also scale. Period. + + if( typeModel == TypeModel::mcfarlandlog ) { + double etmp, etmpsc; + etmp = 0.0; + etmpsc = 0.0; + double DC = log1p(totPopSize/K); + if( std::abs(totPopSize - totPopSize_previous) < 1 ) { + etmp = 0.0; + } else { + etmp = std::abs(DC - DA_previous); + etmpsc = etmp/DA_previous; + } + if(etmp > em1) em1 = etmp; + if(etmpsc > em1sc) em1sc = etmpsc; + DA_previous = DC; + totPopSize_previous = totPopSize; + } +} + +void computeMcFarlandError_new(double& em1, + double& em1sc, // scaled + double& totPopSize_previous, + double& DA_previous, + const std::string& typeFitness, + const double& totPopSize, + const double& K){ + // Simple logic: + // Really, simple thing: compute difference between successive death + // rates, and also scale. Period. + if(typeFitness == "mcfarlandlog") { + double etmp, etmpsc; + etmp = 0.0; + etmpsc = 0.0; + double DC = log1p(totPopSize/K); + if( std::abs(totPopSize - totPopSize_previous) < 1 ) { + etmp = 0.0; + } else { + etmp = std::abs(DC - DA_previous); + etmpsc = etmp/DA_previous; + } + if(etmp > em1) em1 = etmp; + if(etmpsc > em1sc) em1sc = etmpsc; + DA_previous = DC; + totPopSize_previous = totPopSize; + } +} + + + + + +// void computeMcFarlandError_new(double& en1, +// double& totPopSize_previous, +// double& DA_previous, +// const TypeModel typeModel, +// const double& totPopSize, +// const double& K){ +// // Simple logic: + +// // If we updated whenever there was a birth or death we would have these +// // changes between time points A and B (where A comes before B): +// // DA = log(1 + totPopSize_previous/K) [= log1p(totPopSize_previous/K)] +// // Birth of 1: +// // DB = log1p((totPopSize_previous + 1)/K) +// // Death of 1: +// // DB = log1p((totPopSize_previous - 1)/K) + + +// // But we actually have C, not B with: +// // DC = log1p(totPopSize/K) + +// // So we compute: abs(DC - DB)/DA + +// // We can store DA. And yes, DA is generally almost identical to DB. + +// if( typeModel == TypeModel::mcfarlandlog ) { +// double etmp; +// double DC = log1p(totPopSize/K); + + +// if( std::abs(totPopSize - totPopSize_previous) < 1 ) { +// etmp = 0.0; +// } else { +// double DB; +// if ( totPopSize > totPopSize_previous ) { +// DB = log1p((totPopSize_previous + 1)/K); +// } else { // if ( totPopSize < totPopSize_previous ) { +// DB = log1p((totPopSize_previous - 1)/K); +// } +// etmp = std::abs(DC - DB)/DA_previous; +// } +// if(etmp > en1) en1 = etmp; + +// DA_previous = DC; +// totPopSize_previous = totPopSize; +// } +// } + + + + +// void computeMcFarlandError_new(double& en1, +// double& totPopSize_previous, +// double& DA_previous, +// const std::string& typeFitness, +// const double& totPopSize, +// const double& K){ +// // Same as above, but for the old, v.1, specification +// if(typeFitness == "mcfarlandlog") { + +// double etmp; +// double DC = log1p(totPopSize/K); + + +// if( std::abs(totPopSize - totPopSize_previous) < 1 ) { +// etmp = 0.0; +// } else { +// double DB; +// if ( totPopSize > totPopSize_previous ) { +// DB = log1p((totPopSize_previous + 1)/K); +// } else { // if ( totPopSize < totPopSize_previous ) { +// DB = log1p((totPopSize_previous - 1)/K); +// } +// etmp = std::abs(DC - DB)/DA_previous; +// } +// if(etmp > en1) en1 = etmp; + +// DA_previous = DC; +// totPopSize_previous = totPopSize; +// } +// } + + + + + +// void computeMcFarlandError(double& e1, +// double& n_0, +// double& n_1, +// double& tps_0, +// double& tps_1, +// const TypeModel typeModel, +// const double& totPopSize, +// const double& K){ +// // const double& initSize) { +// // static double tps_0 = initSize; +// // static double tps_1 = 0.0; + +// if( (typeModel == TypeModel::mcfarland0) || +// (typeModel == TypeModel::mcfarland) || +// (typeModel == TypeModel::mcfarlandlog) ) { +// double etmp; +// tps_1 = totPopSize; +// if(typeModel == TypeModel::mcfarland) +// etmp = std::abs( tps_1 - (tps_0 + 1) ); +// else { +// if( (tps_0 + 1.0) > tps_1 ) +// etmp = (K + tps_0 + 1.0)/(K + tps_1); +// else +// etmp = (K + tps_1)/(K + tps_0 + 1); +// } +// if(etmp > e1) { +// e1 = etmp; +// n_0 = tps_0; +// n_1 = tps_1; +// } +// tps_0 = tps_1; +// } +// } + + +// void updateRatesMcFarland(std::vector& popParams, +// double& adjust_fitness_MF, +// const double& K, +// const double& totPopSize){ + +// adjust_fitness_MF = totPopSize/K; + +// for(size_t i = 0; i < popParams.size(); ++i) { +// popParams[i].death = adjust_fitness_MF; +// W_f_st(popParams[i]); +// R_f_st(popParams[i]); +// } +// } + + +void updateRatesMcFarlandLog(std::vector& popParams, + double& adjust_fitness_MF, + const double& K, + const double& totPopSize){ + + // from original log(1 + totPopSize/K) + adjust_fitness_MF = log1p(totPopSize/K); + + for(size_t i = 0; i < popParams.size(); ++i) { + popParams[i].death = adjust_fitness_MF; + W_f_st(popParams[i]); + R_f_st(popParams[i]); + } +} + + +// // McFarland0 uses: - penalty as log(1 + N/K), and puts +// // that in the birth rate. +// void updateRatesMcFarland0(std::vector& popParams, +// double& adjust_fitness_MF, +// const double& K, +// const double& totPopSize, +// const int& mutationPropGrowth, +// const double& mu){ + +// adjust_fitness_MF = 1.0 / log1p(totPopSize/K); + +// for(size_t i = 0; i < popParams.size(); ++i) { +// popParams[i].birth = adjust_fitness_MF * popParams[i].absfitness; +// if(mutationPropGrowth) { +// popParams[i].mutation = mu * popParams[i].birth * +// popParams[i].numMutablePos; +// } else if(popParams[i].birth / popParams[i].mutation < 20) { +// Rcpp::Rcout << "\n WARNING: birth/mutation < 20"; +// Rcpp::Rcout << "\n Birth = " << popParams[i].birth +// << "; mutation = " << popParams[i].mutation << "\n"; +// } +// W_f_st(popParams[i]); +// R_f_st(popParams[i]); +// } +// } + +// void updateRatesBeeren(std::vector& popParams, +// double& adjust_fitness_B, +// const double& initSize, +// const double& currentTime, +// const double& alpha, +// const double& totPopSize, +// const int& mutationPropGrowth, +// const double& mu){ + +// double average_fitness = 0.0; // average_fitness in Zhu +// double weighted_sum_fitness = 0.0; +// double N_tilde; + +// for(size_t i = 0; i < popParams.size(); ++i) { +// weighted_sum_fitness += (popParams[i].absfitness * popParams[i].popSize); +// } + +// average_fitness = (1.0/totPopSize) * weighted_sum_fitness; +// N_tilde = initSize * exp(alpha * average_fitness * currentTime); +// adjust_fitness_B = N_tilde/weighted_sum_fitness; + +// if(adjust_fitness_B < 0) { +// throw std::range_error("adjust_fitness_B < 0"); +// } + +// for(size_t i = 0; i < popParams.size(); ++i) { +// popParams[i].birth = adjust_fitness_B * popParams[i].absfitness; +// if(mutationPropGrowth) { +// popParams[i].mutation = mu * popParams[i].birth * +// popParams[i].numMutablePos; +// } else if(popParams[i].birth / popParams[i].mutation < 20) { +// Rcpp::Rcout << "\n WARNING: birth/mutation < 20"; +// Rcpp::Rcout << "\n Birth = " << popParams[i].birth +// << "; mutation = " << popParams[i].mutation << "\n"; +// } +// W_f_st(popParams[i]); +// R_f_st(popParams[i]); +// } +// } + + + + + +void mapTimes_updateP(std::multimap& mapTimes, + std::vector& popParams, + const int index, + const double time) { + // Update the map times <-> indices + // Recall this is the map of nextMutationTime and index of species + // First, remove previous entry, then insert. + // But if we just created the species, nothing to remove from the map. + if(popParams[index].timeLastUpdate > -1) + mapTimes.erase(popParams[index].pv); + popParams[index].pv = mapTimes.insert(std::make_pair(time, index)); +} + + +void getMinNextMutationTime4(int& nextMutant, double& minNextMutationTime, + const std::multimap& mapTimes) { + // we want minNextMutationTime and nextMutant + nextMutant = mapTimes.begin()->second; + minNextMutationTime = mapTimes.begin()->first; +} + + +void fill_SStats(Rcpp::NumericMatrix& perSampleStats, + const std::vector& sampleTotPopSize, + const std::vector& sampleLargestPopSize, + const std::vector& sampleLargestPopProp, + const std::vector& sampleMaxNDr, + const std::vector& sampleNDrLargestPop){ + + for(size_t i = 0; i < sampleTotPopSize.size(); ++i) { + perSampleStats(i, 0) = sampleTotPopSize[i]; + perSampleStats(i, 1) = sampleLargestPopSize[i]; // Never used in R FIXME: remove!! + perSampleStats(i, 2) = sampleLargestPopProp[i]; // Never used in R + perSampleStats(i, 3) = static_cast(sampleMaxNDr[i]); + perSampleStats(i, 4) = static_cast(sampleNDrLargestPop[i]); + } +} + +// Do not use this routinely. Too expensive and not needed. +void detect_ti_duplicates(const std::multimap& m, + const double ti, + const int species) { + + double maxti = m.rbegin()->first; + if((ti < maxti) && (m.count(ti) > 1)) { + Rcpp::Rcout << "\n *** duplicated ti for species " << species << "\n"; + + std::multimap::const_iterator it = m.lower_bound(ti); + std::multimap::const_iterator it2 = m.upper_bound(ti); + + while(it != it2) { + Rcpp::Rcout << "\tgenotype: " << (it->second) << "; time: " << + (it->first) << "\n"; + ++it; + } + Rcpp::Rcout << "\n\n\n"; + } +} + + diff --git a/OncoSimulR/src-i386/bnb_common.h b/OncoSimulR/src-i386/bnb_common.h new file mode 100644 index 00000000..45bdbf13 --- /dev/null +++ b/OncoSimulR/src-i386/bnb_common.h @@ -0,0 +1,191 @@ +// Copyright 2013, 2014, 2015 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + + +#ifndef _BNB_COMMON_H_ +#define _BNB_COMMON_H_ + +#include +#include "common_classes.h" +#include "debug_common.h" +// #include "new_restrict.h" // for the TypeModel enum + +// // Simple custom exception for exceptions that lead to re-runs. +// class rerunExcept: public std::runtime_error { +// public: +// rerunExcept(const std::string &s) : +// std::runtime_error(s) {} +// }; + + +// struct spParamsP { +// double popSize; +// double birth; +// double death; +// double W; +// double R; +// double mutation; +// double timeLastUpdate; +// std::multimap::iterator pv; +// double absfitness; //convenient for Beerenwinkel +// int numMutablePos; //for mutator if need update of mutation +// }; + + +inline void W_f_st(spParamsP& spP){ + spP.W = spP.death + spP.birth + spP.mutation; +} + +inline void R_f_st(spParamsP& spP) { + spP.R = sqrt( pow( spP.birth - spP.death, 2) + + ( 2.0 * (spP.birth + spP.death) + + spP.mutation) * spP.mutation ); +} + + +inline double pE_f_st(double& pM, const spParamsP& spP){ + double pE = (spP.death * (1.0 - pM ) )/(spP.W - spP.death - spP.birth * pM ); + if( !std::isfinite(pE) ) { + DP2(spP.death); DP2(spP.birth); DP2(pM); DP2(spP.W); + DP2(spP.mutation); + std::string error_message = R"(pE.f: pE not finite. + This is expected to happen when mutationPropGrowth = TRUE + and you have have an initMutant with death >> birth, + as that inevitably leads to net birth rate of 0 + and mutation rate of 0)"; + throw std::range_error(error_message); + } + return pE; +} + +inline double pB_f_st(const double& pE, + const spParamsP& spP) { + return (spP.birth * pE)/spP.death; +} + +void mapTimes_updateP(std::multimap& mapTimes, + std::vector& popParams, + const int index, + const double time); + + +void getMinNextMutationTime4(int& nextMutant, double& minNextMutationTime, + const std::multimap& mapTimes); + + +void fill_SStats(Rcpp::NumericMatrix& perSampleStats, + const std::vector& sampleTotPopSize, + const std::vector& sampleLargestPopSize, + const std::vector& sampleLargestPopProp, + const std::vector& sampleMaxNDr, + const std::vector& sampleNDrLargestPop); + + +void print_spP(const spParamsP& spP); + +double pM_f_st(const double& t, const spParamsP& spP); + +double ti_nextTime_tmax_2_st(const spParamsP& spP, + const double& currentTime, + const double& tSample, + int& ti_dbl_min, + int& ti_e3); + +double Algo2_st(const spParamsP& spP, + const double& ti, + const int& mutationPropGrowth); + +double Algo3_st(const spParamsP& spP, const double& t); + +void precissionLoss(); + +void init_tmpP(spParamsP& tmpParam); + +// double returnMFE(double& e1, +// const std::string& typeFitness); + +// double returnMFE(double& e1, +// const TypeModel typeModel); + +double returnMFE_new(double& en1, + const std::string& typeFitness); + +double returnMFE_new(double& en1, + const TypeModel typeModel); + +// void computeMcFarlandError(double& e1, +// double& n_0, +// double& tps_0, +// const std::string& typeFitness, +// const double& totPopSize, +// const double& K); + +// void computeMcFarlandError(double& e1, +// double& n_0, +// double& tps_0, +// const TypeModel typeModel, +// const double& totPopSize, +// const double& K); + +void computeMcFarlandError_new(double& en1, + double& en1sc, + double& totPopSize_previous, + double& DA_previous, + const TypeModel typeModel, + const double& totPopSize, + const double& K); + +void computeMcFarlandError_new(double& en1, + double& en1sc, + double& totPopSize_previous, + double& DA_previous, + const std::string& typeFitness, + const double& totPopSize, + const double& K); + +void updateRatesMcFarland(std::vector& popParams, + double& adjust_fitness_MF, + const double& K, + const double& totPopSize); + +void updateRatesMcFarlandLog(std::vector& popParams, + double& adjust_fitness_MF, + const double& K, + const double& totPopSize); + + +void updateRatesMcFarland0(std::vector& popParams, + double& adjust_fitness_MF, + const double& K, + const double& totPopSize, + const int& mutationPropGrowth, + const double& mu); + +void updateRatesBeeren(std::vector& popParams, + double& adjust_fitness_B, + const double& initSize, + const double& currentTime, + const double& alpha, + const double& totPopSize, + const int& mutationPropGrowth, + const double& mu); + +void detect_ti_duplicates(const std::multimap& m, + const double ti, + const int spcies); + +#endif + diff --git a/OncoSimulR/src-i386/common_classes.h b/OncoSimulR/src-i386/common_classes.h new file mode 100644 index 00000000..82f05230 --- /dev/null +++ b/OncoSimulR/src-i386/common_classes.h @@ -0,0 +1,49 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + + +#ifndef _COMMON_CLS_H_ +#define _COMMON_CLS_H_ + +#include + +// Simple custom exception for exceptions that lead to re-runs. +class rerunExcept: public std::runtime_error { +public: + rerunExcept(const std::string &s) : + std::runtime_error(s) {} +}; + +enum class TypeModel {exp, bozic1, mcfarlandlog}; + + +struct spParamsP { + double popSize; + double birth; + double death; + double W; + double R; + double mutation; + double timeLastUpdate; + std::multimap::iterator pv; + double absfitness; //convenient for Beerenwinkel + int numMutablePos; //for mutator if need update of mutation +}; + + + +#endif + diff --git a/OncoSimulR/src-i386/debug_common.h b/OncoSimulR/src-i386/debug_common.h new file mode 100644 index 00000000..fe10f17d --- /dev/null +++ b/OncoSimulR/src-i386/debug_common.h @@ -0,0 +1,61 @@ +#ifndef _DEBUG_COMMON_H__ +#define _DEBUG_COMMON_H__ + +#include + +// #define DEBUGZ +// #define DEBUGV +#define DEBUGW + +#define DP1(x) {Rcpp::Rcout << "\n DEBUG2: I am at " << x << std::endl;} +#define DP2(x) {Rcpp::Rcout << "\n DEBUG2: Value of " << #x << " = " << x << std::endl;} +#define DP3(x, t){ \ + Rcpp::Rcout <<"\n DEBUG2:" ; \ + for(int xut = 0; xut < t; ++xut) Rcpp::Rcout << "\t "; \ + Rcpp::Rcout << " I am at " << x << std::endl;} +#define DP4(x, t){ \ + Rcpp::Rcout <<"\n DEBUG2:" ; \ + for(int xut = 0; xut < t; ++xut) Rcpp::Rcout << "\t "; \ + Rcpp::Rcout << " Value of " << #x << " = " << x << std::endl; } + +/* void here(std::string x) { */ +/* Rcpp::Rcout << "\n DEBUG: HERE at " << x << std::endl; */ +/* } */ + + +// Windows compiler in BioC is pre 4.8.0, so no to_string +// From http://stackoverflow.com/a/5590404 +#define SSTR( x ) dynamic_cast< std::ostringstream & >( \ + ( std::ostringstream() << std::dec << x ) ).str() + + + + +#ifdef DEBUGW +#define ASSERT(x) { \ + if (! (x)) { \ + Rcpp::Rcout << "\n\nERROR!! Assertion " << #x << " failed\n"; \ + Rcpp::Rcout << " on line " << __LINE__ << "\n\n"; \ + } \ + } +#else +#define ASSERT(x); +#endif + + +#ifdef DEBUGW +#define STOPASSERT(x) { \ + if (! (x)) { \ + Rcpp::Rcout << "\n\nERROR!! Assertion " << #x << " failed\n"; \ + Rcpp::Rcout << " on line " << __LINE__ << std::endl; \ + throw std::out_of_range("STOPASSERT"); \ + } \ + } +#else +#define STOPASSERT(x); +#endif + + +#endif + + diff --git a/OncoSimulR/src-i386/new_restrict.cpp b/OncoSimulR/src-i386/new_restrict.cpp new file mode 100644 index 00000000..b92edba1 --- /dev/null +++ b/OncoSimulR/src-i386/new_restrict.cpp @@ -0,0 +1,1601 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + +// #include "randutils.h" //Nope, until we have gcc-4.8 in Win; full C++11 +#include "debug_common.h" +#include "common_classes.h" +#include "new_restrict.h" +#include +#include +#include +#include +#include + + +using namespace Rcpp; +using std::vector; +using std::back_inserter; + + +std::string concatIntsString(const std::vector& ints, + const std::string sep = ", ") { + std::string strout; + std::string comma = ""; + for(auto const &g : ints) { + strout += (comma + std::to_string(g)); + comma = sep; + } + return strout; +} + + +double prodFitness(const std::vector& s) { + return accumulate(s.begin(), s.end(), 1.0, + [](double x, double y) {return (x * std::max(0.0, (1 + y)));}); +} + +// // This is a better idea? If yes, change code in nr_fitness so that +// // birth 0 is not extinction. +// double prodFitnessNegInf(std::vector s) { +// double f = 1.0; +// for(auto y : s) { +// if( y == -std::numeric_limits::infinity()) { +// return -std::numeric_limits::infinity(); +// } else { +// f *= std::max(0.0, (1 + y)); +// } +// } +// return f; +// } + +double prodDeathFitness(const std::vector& s) { + return accumulate(s.begin(), s.end(), 1.0, + [](double x, double y) {return (x * std::max(0.0, (1 - y)));}); +} + +double prodMuts(const std::vector& s) { + // From prodFitness + // return accumulate(s.begin(), s.end(), 1.0, + // [](double x, double y) {return (x * y);}); + return accumulate(s.begin(), s.end(), 1.0, + std::multiplies()); +} + + + +// new cPDetect +double set_cPDetect(const double n2, const double p2, + const double PDBaseline) { + return ( -log(1.0 - p2) * (PDBaseline / (n2 - PDBaseline)) ); +} + +// Next two are identical, except for scaling the k. Use the simplest. +double probDetectSize(const double n, const double cPDetect, + const double PDBaseline) { + if(n <= PDBaseline) { + return 0; + } else { + return (1 - exp( -cPDetect * ( (n - PDBaseline)/PDBaseline ) )); + } +} + + +// Former cpDetect mechanism + +// double set_cPDetect(const double n2, const double p2, +// const double PDBaseline) { +// return (-log(1.0 - p2)/(n2 - PDBaseline)); +// } + +// // Next two are identical, except for scaling the k. Use the simplest. +// double probDetectSize(const double n, const double cPDetect, +// const double PDBaseline) { +// if(n <= PDBaseline) { +// return 0; +// } else { +// return (1 - exp( -cPDetect * (n - PDBaseline))); +// } +// } + +// // double prob_exit_ratio(const double n, const double k, const double baseline) { +// // if(n <= baseline) { +// // return 0; +// // } else { +// // return (1 - exp( -c * ( (n - baseline)/baseline))); +// // } +// // } + +// is this detected, by the probability of detection as a function of size? +bool detectedSizeP(const double n, const double cPDetect, + const double PDBaseline, std::mt19937& ran_gen) { + if(cPDetect < 0) { + // As we OR, return false if this condition does not apply + return false; + } else { + std::uniform_real_distribution runif(0.0, 1.0); + double prob = probDetectSize(n, cPDetect, PDBaseline); + if(prob <= 0.0) return false; + if(runif(ran_gen) <= prob) { + return true; + } else { + return false; + } + } +} + + + +bool operator==(const Genotype& lhs, const Genotype& rhs) { + return (lhs.orderEff == rhs.orderEff) && + (lhs.epistRtEff == rhs.epistRtEff) && + (lhs.rest == rhs.rest) && + (lhs.flGenes == rhs.flGenes); +} + +// Added for completeness, but not used now +// bool operator<(const Genotype& lhs, const Genotype& rhs) { +// std::vector lh = genotypeSingleVector(lhs); +// std::vector rh = genotypeSingleVector(rhs); +// if( lh.size() < rh.size() ) return true; +// else if ( lh.size() > rh.size() ) return false; +// else { +// for(size_t i = 0; i != lh.size(); ++i) { +// if( lh[i] < rh[i] ) return true; +// } +// return false; +// } +// } + + +TypeModel stringToModel(const std::string& mod) { + if(mod == "exp") + return TypeModel::exp; + else if(mod == "bozic1") + return TypeModel::bozic1; + else if(mod == "mcfarlandlog") + return TypeModel::mcfarlandlog; + // else if(mod == "mcfarland") + // return TypeModel::mcfarland; + // else if(mod == "beerenwinkel") + // return TypeModel::beerenwinkel; + // else if(mod == "mcfarland0") + // return TypeModel::mcfarland0; + // else if(mod == "bozic2") + // return TypeModel::bozic2; + else + throw std::out_of_range("Not a valid TypeModel"); +} + + +Dependency stringToDep(const std::string& dep) { + if(dep == "monotone") // AND, CBN, CMPN + return Dependency::monotone; + else if(dep == "semimonotone") // OR, SMN, DMPN + return Dependency::semimonotone; + else if(dep == "xmpn") // XOR, XMPN + return Dependency::xmpn; + else if(dep == "--") // for root, for example + return Dependency::single; + else + throw std::out_of_range("Not a valid typeDep"); + // We never create the NA from entry data. NA is reserved for Root. +} + +// std::string depToString(const Dependency dep) { +// switch(dep) { +// case Dependency::monotone: +// return "CMPN or monotone"; +// case Dependency::semimonotone: +// return "DMPN or semimonotone"; +// case Dependency::xmpn: +// return "XMPN (XOR)"; +// case Dependency::single: +// return "--"; +// default: +// throw std::out_of_range("Not a valid dependency"); +// } +// } + + +void print_Genotype(const Genotype& ge) { + Rcpp::Rcout << "\n Printing Genotype"; + Rcpp::Rcout << "\n\t\t order effects genes:"; + for(auto const &oo : ge.orderEff) Rcpp::Rcout << " " << oo; + Rcpp::Rcout << "\n\t\t epistasis and restriction effects genes:"; + for(auto const &oo : ge.epistRtEff) Rcpp::Rcout << " " << oo; + Rcpp::Rcout << "\n\t\t non interaction genes :"; + for(auto const &oo : ge.rest) Rcpp::Rcout << " " << oo; + Rcpp::Rcout << "\n\t\t fitness landscape genes :"; + for(auto const &oo : ge.flGenes) Rcpp::Rcout << " " << oo; + Rcpp::Rcout << std::endl; +} + +vector genotypeSingleVector(const Genotype& ge) { + // orderEff in the order they occur. All others are sorted. + std::vector allgG; + allgG.insert(allgG.end(), ge.orderEff.begin(), ge.orderEff.end()); + allgG.insert(allgG.end(), ge.epistRtEff.begin(), ge.epistRtEff.end()); + allgG.insert(allgG.end(), ge.rest.begin(), ge.rest.end()); + allgG.insert(allgG.end(), ge.flGenes.begin(), ge.flGenes.end()); + // this should not be unique'd as it aint' sorted + return allgG; +} + + +vector allGenesinFitness(const fitnessEffectsAll& F) { + // Sorted + std::vector g0; + + if(F.Gene_Module_tabl.size()) { + if( F.Gene_Module_tabl[0].GeneNumID != 0 ) + throw std::logic_error("\n Gene module table's first element must be 0." + " This should have been caught in R."); + // for(vector::size_type i = 1; + // i != F.Gene_Module_tabl.size(); i++) { + for(decltype(F.Gene_Module_tabl.size()) i = 1; + i != F.Gene_Module_tabl.size(); i++) { + g0.push_back(F.Gene_Module_tabl[i].GeneNumID); + } + } + // for(auto const &a : F.Gene_Module_tabl) { + // if(a.GeneNumID != 0) g0.push_back(a.GeneNumID); + // } + for(auto const &b: F.genesNoInt.NumID) { + g0.push_back(b); + } + // sort(g0.begin(), g0.end()); + for(auto const &b: F.fitnessLandscape.NumID) { + g0.push_back(b); + } + sort(g0.begin(), g0.end()); + + + // Can we assume the fitness IDs go from 0 to n? Nope: because of + // muEF. But we assume in several places that there are no repeated + // elements in the output from this function. + + // FIXME we verify there are no repeated elements. That is to strongly + // check our assumptions are right. Alternatively, return the "uniqued" + // vector and do not check anything. + std::vector g0_cp(g0); + g0.erase( unique( g0.begin(), g0.end() ), g0.end() ); + if(g0.size() != g0_cp.size()) + throw std::logic_error("\n allGenesinFitness: repeated genes. " + " This should have been caught in R."); + return g0; +} + +vector allGenesinGenotype(const Genotype& ge){ + // Like genotypeSingleVector, but sorted + std::vector allgG; + for(auto const &g1 : ge.orderEff) + allgG.push_back(g1); + for(auto const &g2 : ge.epistRtEff) + allgG.push_back(g2); + for(auto const &g3 : ge.rest) + allgG.push_back(g3); + for(auto const &g4 : ge.flGenes) + allgG.push_back(g4); + + sort(allgG.begin(), allgG.end()); + // Remove duplicates see speed comparisons here: + // http://stackoverflow.com/questions/1041620/whats-the-most-efficient-way-to-erase-duplicates-and-sort-a-vector + // We assume here there are no duplicates. Yes, a gene can have both + // fitness effects and order effects and be in the DAG. But it will be + // in only one of the buckets. + std::vector g0_cp(allgG); + allgG.erase( unique( allgG.begin(), allgG.end() ), allgG.end() ); + if(allgG.size() != g0_cp.size()) + throw std::logic_error("\n allGenesinGenotype: repeated genes." + " This should have been caught in R."); + return allgG; +} + + +// For users: if something depends on 0, that is it. No further deps. +// And do not touch the 0 in Gene_Module_table. +std::vector rTable_to_Poset(Rcpp::List rt) { + + // The restriction table, or Poset, has a first element + // with nothing, so that all references by mutated gene + // are simply accessing the Poset[mutated gene] without + // having to remember to add 1, etc. + + std::vector Poset; + + Poset.resize(rt.size() + 1); + Poset[0].child = "0"; //should this be Root?? I don't think so. + Poset[0].childNumID = 0; + Poset[0].typeDep = Dependency::NA; + Poset[0].s = std::numeric_limits::quiet_NaN(); + Poset[0].sh = std::numeric_limits::quiet_NaN(); + Poset[0].parents.resize(0); + Poset[0].parentsNumID.resize(0); + + Rcpp::List rt_element; + // std::string tmpname; + Rcpp::IntegerVector parentsid; + Rcpp::CharacterVector parents; + + for(int i = 1; i != (rt.size() + 1); ++i) { + rt_element = rt[i - 1]; + Poset[i].child = Rcpp::as(rt_element["child"]); + Poset[i].childNumID = as(rt_element["childNumID"]); + Poset[i].typeDep = stringToDep(as(rt_element["typeDep"])); + Poset[i].s = as(rt_element["s"]); + Poset[i].sh = as(rt_element["sh"]); + + // if(i != Poset[i].childNumID) { + // Nope: this assumes we only deal with posets. + // // Rcpp::Rcout << "\n childNumID, original = " << as(rt_element["childNumID"]); + // // Rcpp::Rcout << "\n childNumID, Poset = " << Poset[i].childNumID; + // // Rcpp::Rcout << "\n i = " << i << std::endl; + // throw std::logic_error("childNumID != index"); + // } + // Rcpp::IntegerVector parentsid = as(rt_element["parentsNumID"]); + // Rcpp::CharacterVector parents = as(rt_element["parents"]); + + parentsid = as(rt_element["parentsNumID"]); + parents = as(rt_element["parents"]); + + if( parentsid.size() != parents.size() ) { + throw std::logic_error("parents size != parentsNumID size. Bug in R code."); + } + + for(int j = 0; j != parentsid.size(); ++j) { + Poset[i].parentsNumID.push_back(parentsid[j]); + Poset[i].parents.push_back( (Rcpp::as< std::string >(parents[j])) ); + // tmpname = Rcpp::as< std::string >(parents[j]); + // Poset[i].parents.push_back(tmpname); + } + + // Should not be needed if R always does what is should. Disable later? + if(! is_sorted(Poset[i].parentsNumID.begin(), Poset[i].parentsNumID.end()) ) + throw std::logic_error("ParentsNumID not sorted. Bug in R code."); + + if(std::isinf(Poset[i].s)) + Rcpp::Rcout << "WARNING: at least one s is infinite" + << std::endl; + if(std::isinf(Poset[i].sh) && (Poset[i].sh > 0)) + Rcpp::Rcout << "WARNING: at least one sh is positive infinite" + << std::endl; + } + return Poset; +} + + +std::vector R_GeneModuleToGeneModule(Rcpp::List rGM) { + + std::vector geneModule; + + Rcpp::IntegerVector GeneNumID = rGM["GeneNumID"]; + Rcpp::IntegerVector ModuleNumID = rGM["ModuleNumID"]; + Rcpp::CharacterVector GeneName = rGM["Gene"]; + Rcpp::CharacterVector ModuleName = rGM["Module"]; + // geneModule.resize(GeneNumID.size()); + geneModule.resize(GeneNumID.size()); // remove later + + for(size_t i = 0; i != geneModule.size(); ++i) { + if( static_cast(i) != GeneNumID[i]) + throw std::logic_error(" i != GeneNumID. Bug in R code."); + // geneModule[i].GeneNumID = GeneNumID[i]; + // geneModule[i].ModuleNumID = ModuleNumID[i]; + // remove these later? + geneModule[i].GeneNumID = GeneNumID[i]; + geneModule[i].ModuleNumID = ModuleNumID[i]; + geneModule[i].GeneName = GeneName[i]; + geneModule[i].ModuleName = ModuleName[i]; + } + + return geneModule; +} + + +std::vector GeneToModule(const std::vector& Drv, + const + std::vector& Gene_Module_tabl, + const bool sortout, const bool uniqueout) { + + std::vector mutatedModules; + + for(auto it = Drv.begin(); it != Drv.end(); ++it) { + mutatedModules.push_back(Gene_Module_tabl[(*it)].ModuleNumID); + } + // sortout and uniqueout returns a single element of each. uniqueout only removes + // successive duplicates. sortout without unique is just useful for knowing + // what happens for stats, etc. Neither sortout nor uniqueout for keeping + // track of order of module events. + if(sortout) { + sort( mutatedModules.begin(), mutatedModules.end() ); + } + if(uniqueout) { + mutatedModules.erase( unique( mutatedModules.begin(), + mutatedModules.end() ), + mutatedModules.end() ); + } + return mutatedModules; +} + + +// fitnessLandscape_struct convertFitnessLandscape(Rcpp::List flg, +// Rcpp::DataFrame fl_df) { +fitnessLandscape_struct convertFitnessLandscape(Rcpp::List flg, + Rcpp::List fl_df) { + + fitnessLandscape_struct flS; + flS.names = Rcpp::as >(flg["Gene"]); + flS.NumID = Rcpp::as >(flg["GeneNumID"]); + + std::vector genotNames = + Rcpp::as >(fl_df["Genotype"]); + // Rcpp::CharacterVector genotNames = fl_df["Genotype"]; + Rcpp::NumericVector fitness = fl_df["Fitness"]; + + // Fill up the map genotypes(as string) to fitness + //for(size_t i = 0; i != fl_df.nrows(); ++i) { + for(size_t i = 0; i != genotNames.size(); ++i) { + flS.flmap.insert({genotNames[i], fitness[i]}); + } + + return flS; +} + +genesWithoutInt convertNoInts(Rcpp::List nI) { + + genesWithoutInt genesNoInt; + // FIXME: I think I want to force Gene in long.geneNoInt to be a char.vector + // to avoid this transformation. + // Rcpp::CharacterVector names = nI["Gene"]; + // Rcpp::IntegerVector id = nI["GeneNumID"]; + // Rcpp::NumericVector s1 = nI["s"]; + + // genesNoInt.names = Rcpp::as >(names); + genesNoInt.names = Rcpp::as >(nI["Gene"]); + genesNoInt.NumID = Rcpp::as >(nI["GeneNumID"]); + genesNoInt.s = Rcpp::as >(nI["s"]); + genesNoInt.shift = genesNoInt.NumID[0]; // FIXME: we assume mutations always + // indexed 1 to something. Not 0 to + // something. + return genesNoInt; +} + + +std::vector convertEpiOrderEff(Rcpp::List ep) { + + std::vector Epistasis; + + Rcpp::List element; + // For epistasis, the numID must be sorted, but never with order effects. + // Things come sorted (or not) from R. + Epistasis.resize(ep.size()); + for(int i = 0; i != ep.size(); ++i) { + element = ep[i]; + Epistasis[i].NumID = Rcpp::as >(element["NumID"]); + Epistasis[i].names = Rcpp::as >(element["ids"]); + Epistasis[i].s = as(element["s"]); + } + return Epistasis; +} + +std::vector sortedAllOrder(const std::vector& E) { + + std::vector allG; + for(auto const &ec : E) { + for(auto const &g : ec.NumID) { + allG.push_back(g); + } + } + sort(allG.begin(), allG.end()); + allG.erase( unique( allG.begin(), allG.end()), + allG.end()); + return allG; +} + +std::vector sortedAllPoset(const std::vector& Poset) { + // Yes, this could be done inside rTable_to_Poset but this is cleaner + // and will only add very little time. + std::vector allG; + for(auto const &p : Poset) { + allG.push_back(p.childNumID); + } + sort(allG.begin(), allG.end()); + allG.erase( unique( allG.begin(), allG.end()), + allG.end()); + return allG; +} + +fitnessEffectsAll convertFitnessEffects(Rcpp::List rFE) { + // Yes, some of the things below are data.frames in R, but for + // us that is used just as a list. + + fitnessEffectsAll fe; + + Rcpp::List rrt = rFE["long.rt"]; + Rcpp::List re = rFE["long.epistasis"]; + Rcpp::List ro = rFE["long.orderEffects"]; + Rcpp::List rgi = rFE["long.geneNoInt"]; + Rcpp::List rgm = rFE["geneModule"]; + bool rone = as(rFE["gMOneToOne"]); + Rcpp::IntegerVector drv = rFE["drv"]; + + Rcpp::List flg = rFE["fitnessLandscape_gene_id"]; + // clang does not like this + // Rcpp::DataFrame fl_df = rFE["fitnessLandscape_df"]; + Rcpp::List fl_df = rFE["fitnessLandscape_df"]; + + + + + // In the future, if we want noInt and fitnessLandscape, all + // we need is use the fitness landscape with an index smaller than those + // of noInt. So we can use noInt with shift being those in fitnessLandscape. + // BEWARE: will need to modify also createNewGenotype. + + // if(fl_df.nrows()) { + if(fl_df.size()) { + fe.fitnessLandscape = convertFitnessLandscape(flg, fl_df); + } + + if(rrt.size()) { + fe.Poset = rTable_to_Poset(rrt); + } + if(re.size()) { + fe.Epistasis = convertEpiOrderEff(re); + } + if(ro.size()) { + fe.orderE = convertEpiOrderEff(ro); + } + if(rgi.size()) { + fe.genesNoInt = convertNoInts(rgi); + } else { + fe.genesNoInt.shift = -9L; + } + // If this is null, use the nullFitnessEffects function; never + // end up here. + + // if( (rrt.size() + re.size() + ro.size() + rgi.size() + fl_df.nrows()) == 0) { + if( (rrt.size() + re.size() + ro.size() + rgi.size() + fl_df.size()) == 0) { + throw std::logic_error("\n Nothing inside this fitnessEffects; why are you here?" + " Bug in R code."); + } + + // At least for now, if fitness landscape nothing else allowed + // if(fl_df.nrows() && ((rrt.size() + re.size() + ro.size() + rgi.size()) > 0)) { + if(fl_df.size() && ((rrt.size() + re.size() + ro.size() + rgi.size()) > 0)) { + throw std::logic_error("\n Fitness landscape specification." + " There should be no other terms. " + " Bug in R code"); + } + + // This is silly + // if(fl_df.nrows() && (rgm.size() > 4) ) { + // throw std::logic_error("\n Fitness landscape specification." + // " Cannot use modules. " + // " Bug in R code"); + // } + + fe.Gene_Module_tabl = R_GeneModuleToGeneModule(rgm); + fe.allOrderG = sortedAllOrder(fe.orderE); + fe.allPosetG = sortedAllPoset(fe.Poset); + fe.gMOneToOne = rone; + fe.allGenes = allGenesinFitness(fe); + fe.genomeSize = fe.Gene_Module_tabl.size() - 1 + fe.genesNoInt.s.size() + + fe.fitnessLandscape.NumID.size(); + fe.drv = as > (drv); + sort(fe.drv.begin(), fe.drv.end()); //should not be needed, but just in case + // cannot trust R gives it sorted + // check_disable_later + if(fe.genomeSize != static_cast(fe.allGenes.size())) { + throw std::logic_error("\n genomeSize != allGenes.size(). Bug in R code."); + } + // At least for now + if(fe.fitnessLandscape.NumID.size() > 0) { + if(fe.genomeSize != static_cast(fe.fitnessLandscape.NumID.size())) { + throw std::logic_error("\n genomeSize != genes in fitness landscape." + "Bug in R code."); + } + } + + return fe; +} + +// Before making allGenesinGenotype return a unique vector: we do a +// set_difference below. If we look at the help +// (http://en.cppreference.com/w/cpp/algorithm/set_difference) if we had +// more repetitions of an element in allGenes than in sortedparent we +// could have a problem. But if you look at function "allgenesinFitness", +// which is the one used to give the allgenes vector, you will see that +// that one returns only one entry per gene, as it parses the geneModule +// structure. So even if allGenesinGenotype returns multiple entries +// (which they don't), there will be no bugs as the maximum number of +// entries in the output of setdiff will be 0 or 1 as m is 1. But +// allGenesinGenotype cannot return more than one element as can be seen +// in createNewGenotype: an element only ends in the order component if it +// is not in the epistasis component. So there are no repeated elements +// in allGenes or in sortedparent below. Also, beware that does not break +// correct fitness evaluation of fitnessEffects where the same gene is in +// epistasis and order, as can be seen in the tests and because of how we +// evaluate fitness, where genes in a genotype in the orderEffects bucket +// are placed also in the epist for fitness eval. See evalGenotypeFitness +// and createNewGenotype. +void obtainMutations(const Genotype& parent, + const fitnessEffectsAll& fe, + int& numMutablePosParent, + std::vector& newMutations, + //randutils::mt19937_rng& ran_gen + std::mt19937& ran_gen, + std::vector mu) { + //Ugly: we return the mutations AND the numMutablePosParent This is + // almost ready to accept multiple mutations. And it returns a vector, + // newMutations. + std::vector sortedparent = allGenesinGenotype(parent); + std::vector nonmutated; + set_difference(fe.allGenes.begin(), fe.allGenes.end(), + sortedparent.begin(), sortedparent.end(), + back_inserter(nonmutated)); + // numMutablePos is used not only for mutation but also to decide about + // the dummy or null mutation case. + numMutablePosParent = nonmutated.size(); + if(nonmutated.size() < 1) + throw std::out_of_range("Trying to obtain a mutation when nonmutated.size is 0." + " Bug in R code; let us know."); + if(mu.size() == 1) { // common mutation rate + // FIXME:chromothr would not use this, or this is the limit case with a + // single mutant + std::uniform_int_distribution rpos(0, nonmutated.size() - 1); + newMutations.push_back(nonmutated[rpos(ran_gen)]); + } else { // per-gene mutation rate. + // Remember that mutations always indexed from 1, not from 0. + // We take an element from a discrete distribution, with probabilities + // proportional to the rates. + // FIXME:varmutrate give a warning if the only mu is for mu = 0. + std::vector mu_nm; + for(auto const &nm : nonmutated) mu_nm.push_back(mu[nm - 1]); + std::discrete_distribution rpos(mu_nm.begin(), mu_nm.end()); + newMutations.push_back(nonmutated[rpos(ran_gen)]); + } + // randutils + // // Yes, the next will work, but pick is simpler! + // // size_t rpos = ran_gen.uniform(static_cast(0), nonmutated.size() - 1); + // // newMutations.push_back(nonmutated[rpos]); + // int posmutated = ran_gen.pick(nonmutated); + // newMutations.push_back(posmutated); +} + + +// std::vector genesInOrderModules(const fitnessEffectsAll& fe) { +// vector genes; +// if(fe.gMOneToOne) +// genes = fe.alOrderG; +// else { +// for(auto const &m : fe.allOrderG) { +// genes.push_back() +// } + +// } +// return genes; +// } + + +fitness_as_genes fitnessAsGenes(const fitnessEffectsAll& fe) { + // Give the fitnessEffects in terms of genes, not modules. + + // Extract the noInt. Then those in order effects by creating a multimap + // to go from map to genes. Then all remaining genes are those only in + // poset. By set_difference. + fitness_as_genes fg = zero_fitness_as_genes(); + + // fitness_as_genes fg; + fg.flGenes = fe.fitnessLandscape.NumID; + if(fg.flGenes.size()) { + return fg; + } + + fg.noInt = fe.genesNoInt.NumID; + // //zz: debugging + // for(auto const &o : fe.genesNoInt.NumID) { + // DP2(o); + // } + // for(auto const &o : fe.genesNoInt.names) { + // DP2(o); + // } + // // + std::multimap MG; + for( auto const &mt : fe.Gene_Module_tabl) { + MG.insert({mt.ModuleNumID, mt.GeneNumID}); + } + for (auto const &o : fe.allOrderG) { + for(auto pos = MG.lower_bound(o); pos != MG.upper_bound(o); ++pos) + fg.orderG.push_back(pos->second); + } + sort(fg.orderG.begin(), fg.orderG.end()); + + std::vector tmpv = fg.orderG; + tmpv.insert(tmpv.end(),fg.noInt.begin(), fg.noInt.end()); + sort(tmpv.begin(), tmpv.end()); // should not be needed + + set_difference(fe.allGenes.begin(), fe.allGenes.end(), + tmpv.begin(), tmpv.end(), + back_inserter(fg.posetEpistG)); + // fg.posetEpistG.sort(fg.posetEpistG.begin(), + // fg.posetEpistG.end()); + + // // //zz: debugging + // DP1("order"); + // for(auto const &o : fg.orderG) { + // DP2(o); + // } + // DP1("posetEpist"); + // for(auto const &o : fg.posetEpistG) { + // DP2(o); + // } + // DP1("noint"); + // for(auto const &o : fg.noInt) { + // DP2(o); + // } + // DP1("fl"); + // for(auto const &o : fg.flGenes) { + // DP2(o); + // } + + + return fg; +} + + +std::map mapGenesIntToNames(const fitnessEffectsAll& fe) { + // This is a convenience, used in the creation of output. + // Sure, we could do this when reading the data in. + // The noInt in convertNoInts. + std::map gg; + + for(auto const &mt : fe.Gene_Module_tabl) { + gg.insert({mt.GeneNumID, mt.GeneName}); + } + // this is pedantic, as what is the size_type of NumID and of names? + // for(decltype(fe.genesNoInt.s.size()) i = 0; + // i != fe.genesNoInt.s.size(); ++i) + + for(size_t i = 0; + i != fe.genesNoInt.NumID.size(); ++i){ + gg.insert({fe.genesNoInt.NumID[i], fe.genesNoInt.names[i]}); + } + + // zz + for(size_t i = 0; + i != fe.fitnessLandscape.NumID.size(); ++i){ + gg.insert({fe.fitnessLandscape.NumID[i], fe.fitnessLandscape.names[i]}); + } + + + return gg; +} + +// It is simple to write specialized functions for when +// there are no restrictions or no order effects , etc. +Genotype createNewGenotype(const Genotype& parent, + const std::vector& mutations, + const fitnessEffectsAll& fe, + std::mt19937& ran_gen, + //randutils::mt19937_rng& ran_gen + bool random = true) { + // random: if multiple mutations, randomly shuffle the ordered ones? + // This is the way to go if chromothripsis, but not if we give an + // initial mutant + + Genotype newGenot = parent; + std::vector tempOrder; // holder for multiple muts if order. + bool sort_rest = false; + bool sort_epist = false; + bool sort_flgenes = false; + + // FIXME: we need to create the mutations! + + // Order of ifs: I suspect order effects rare. No idea about + // non-interaction genes, but if common the action is simple. + + // A gene that is involved both in order effects and epistasis, only + // ends up in the orderEff container, for concision (even if a gene can + // be involved in both orderEff and epistasis and rT). But in the + // genotype evaluation, in evalGenotypeFitness, notice that we create + // the vector of genes to be checked against epistais and order effects + // using also those from orderEff: + // std::vector mutG (ge.epistRtEff); + // mutG.insert( mutG.end(), ge.orderEff.begin(), ge.orderEff.end()); + for(auto const &g : mutations) { + // If we are dealing with a fitness landscape, that is as far as we go here + // at least for now. No other genes affect fitness. + // But this can be easily fixed in the future; like this? + // if(g <= (fe.fitnessLandscape.NumID.size() + 1)) { + // and restructure the else logic for the noInt + if(fe.fitnessLandscape.NumID.size()) { + newGenot.flGenes.push_back(g); + sort_flgenes = true; + } else { + if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int + // We can be dealing with modules + int m; + if(fe.gMOneToOne) { + m = g; + } else { + m = fe.Gene_Module_tabl[g].ModuleNumID; + } + if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) { + newGenot.epistRtEff.push_back(g); + sort_epist = true; + } else { + tempOrder.push_back(g); + } + } else { + // No interaction genes so no module stuff + newGenot.rest.push_back(g); + sort_rest = true; + } + } + } + + // If there is order but multiple simultaneous mutations + // (chromothripsis), we randomly insert them + + // FIXME: initMutant cannot use this!! we give the order!!! + if( (tempOrder.size() > 1) && random) + shuffle(tempOrder.begin(), tempOrder.end(), ran_gen); + // The new randutils engine: + // if(tempOrder.size() > 1) + // ran_gen.shuffle(tempOrder.begin(), tempOrder.end()); + + + for(auto const &g : tempOrder) + newGenot.orderEff.push_back(g); + + // Sorting done at end, in case multiple mutations + if(sort_rest) + sort(newGenot.rest.begin(), newGenot.rest.end()); + if(sort_epist) + sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end()); + if(sort_flgenes) + sort(newGenot.flGenes.begin(), newGenot.flGenes.end()); + return newGenot; +} + +// FIXME: Prepare specialized functions: +// Specialized functions: +// Never interactions: push into rest and sort. Identify by shift == 1. +// Never no interactions: remove the if. shift == -9. + + + + +// A paranoid check in case R code has bugs. +void breakingGeneDiff(const vector& genotype, + const vector& fitness) { + std::vector diffg; + + set_difference(genotype.begin(), genotype.end(), + fitness.begin(), fitness.end(), + back_inserter(diffg)); + if(diffg.size()) { + Rcpp::Rcout << "Offending genes :"; + for(auto const &gx : diffg) { + Rcpp::Rcout << " " << gx; + } + Rcpp::Rcout << "\t Genotype: "; + for(auto const &g1 : genotype) Rcpp::Rcout << " " << g1; + Rcpp::Rcout << "\t Fitness: "; + for(auto const &g1 : fitness) Rcpp::Rcout << " " << g1; + + Rcpp::Rcout << "\n "; + throw std::logic_error("\n At least one gene in the genotype not in fitness effects." + " Bug in R code."); + + } +} + +void checkNoNegZeroGene(const vector& ge) { + if( ge[0] == 0 ) + throw std::logic_error("\n Genotype cannot contain 0. Bug in R code."); + else if(ge[0] < 0) + throw std::logic_error("\n Genotype cannot contain negative values. Bug in R code."); +} + +void checkLegitGenotype(const Genotype& ge, + const fitnessEffectsAll& F) { + if((ge.orderEff.size() + ge.epistRtEff.size() + ge.rest.size()) == 0) { + // An empty genotype is always legitimate, even if silly + return; + } + // vector g0 = allGenesinFitness(F); + vector g0 = F.allGenes; + vector allgG = allGenesinGenotype(ge); + checkNoNegZeroGene(allgG); + breakingGeneDiff(allgG, g0); +} + +void checkLegitGenotype(const vector& ge, + const fitnessEffectsAll& F) { + if (ge.size() == 0) { + // An empty genotype is always legitimate, even if silly + return; + } + // std::vector g0 = allGenesinFitness(F); + vector g0 = F.allGenes; + std::vector allgG (ge); + sort(allgG.begin(), allgG.end()); + checkNoNegZeroGene(allgG); + breakingGeneDiff(allgG, g0); +} + + + +Genotype convertGenotypeFromInts(const std::vector& gg, + const fitnessEffectsAll& fe) { + // A genotype is of one kind or another depending on what genes are of + // what type. + Genotype newGenot; + + if(gg.size() != 0) { + // check_disable_later + checkLegitGenotype(gg, fe); + + // Very similar to logic in createNewGenotype for placing each gene in + // its correct place, which needs to look at module mapping. + for(auto const &g : gg) { + if(fe.fitnessLandscape.NumID.size()) { + newGenot.flGenes.push_back(g); + } else { + if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int + // We can be dealing with modules + int m; + if(fe.gMOneToOne) { + m = g; + } else { + m = fe.Gene_Module_tabl[g].ModuleNumID; + } + if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) { + newGenot.epistRtEff.push_back(g); + } else { + newGenot.orderEff.push_back(g); + } + } else { + // No interaction genes so no module stuff + newGenot.rest.push_back(g); + } + } + } + + sort(newGenot.flGenes.begin(), newGenot.flGenes.end()); + sort(newGenot.rest.begin(), newGenot.rest.end()); + sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end()); + } else { + newGenot = wtGenotype(); // be explicit!! + } + return newGenot; +} + + +Genotype convertGenotypeFromR(Rcpp::IntegerVector rG, + const fitnessEffectsAll& fe) { + std::vector gg = Rcpp::as > (rG); + return convertGenotypeFromInts(gg, fe); +} + + +// Previous version +// Genotype convertGenotypeFromR(Rcpp::IntegerVector rG, +// const fitnessEffectsAll& fe) { +// // A genotype is of one kind or another depending on what genes are of +// // what type. + +// std::vector gg = Rcpp::as > (rG); +// Genotype newGenot; + +// // check_disable_later +// checkLegitGenotype(gg, fe); + + +// // Very similar to logic in createNewGenotype for placing each gene in +// // its correct place, which needs to look at module mapping. +// for(auto const &g : gg) { +// if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int +// // We can be dealing with modules +// int m; +// if(fe.gMOneToOne) { +// m = g; +// } else { +// m = fe.Gene_Module_tabl[g].ModuleNumID; +// } +// if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) { +// newGenot.epistRtEff.push_back(g); +// } else { +// newGenot.orderEff.push_back(g); +// } +// } else { +// // No interaction genes so no module stuff +// newGenot.rest.push_back(g); +// } +// } + +// sort(newGenot.rest.begin(), newGenot.rest.end()); +// sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end()); + +// return newGenot; +// } + + +// Genotype convertGenotypeFromR(Rcpp::List rGE) { + +// Genotype g; + +// Rcpp::IntegerVector oe = rGE["orderEffGenes"]; +// Rcpp::IntegerVector ert = rGE["epistRTGenes"]; +// Rcpp::IntegerVector rest = rGE["noInteractionGenes"]; + +// g.orderEff = Rcpp::as > (oe); +// g.epistRtEff = Rcpp::as > (ert); +// g.rest = Rcpp::as > (rest); +// sort(g.epistRtEff.begin(), g.epistRtEff.end()); +// sort(g.rest.begin(), g.rest.end()); + +// return g; +// } + + + + +bool match_order_effects(const std::vector& O, + const std::vector& G) { + //As the name says: we check if the order effect is matched + if(G.size() < O.size()) return false; + + std::vector::const_iterator p; + std::vector vdist; + + auto itb = G.begin(); + + for(auto const &o : O) { + p = find(G.begin(), G.end(), o); + if( p == G.end() ) { + return false; + } else { + vdist.push_back(std::distance( itb, p )); + } + } + // Rcpp::Rcout << " "; + // for(auto vv : vdist ) { + // Rcpp::Rcout << vv << " "; + // } + // Rcpp::Rcout << std:: endl; + if( is_sorted(vdist.begin(), vdist.end()) ) { + return true; + } else { + return false; + } +} + +std::vector evalOrderEffects(const std::vector& mutatedM, + const std::vector& OE) { + std::vector s; + for(auto const &o : OE) { + if(match_order_effects(o.NumID, mutatedM)) + s.push_back(o.s); + } + return s; +} + + +bool match_negative_epist(const std::vector& E, + const std::vector& G) { + // When we have things like -1, 2 in epistasis. We need to check 2 is + // present and 1 is not present. E is the vector of epistatic coeffs, + // and G the sorted genotype. + + if(G.size() < 1) return false; + + for(auto const &e : E) { + if(e < 0) { + if(binary_search(G.begin(), G.end(), -e)) + return false; + } else { + if(!binary_search(G.begin(), G.end(), e)) + return false; + } + } + return true; +} + + +std::vector evalEpistasis(const std::vector& mutatedModules, + const std::vector& Epistasis) { + std::vector s; + + // check_disable_later + if(! is_sorted(mutatedModules.begin(), mutatedModules.end())) + throw std::logic_error("mutatedModules not sorted in evalEpistasis." + " Bug in R code."); + + for(auto const &p : Epistasis ) { + if(p.NumID[0] > 0 ) { + if(includes(mutatedModules.begin(), mutatedModules.end(), + p.NumID.begin(), p.NumID.end())) + s.push_back(p.s); + } else { + if(match_negative_epist(p.NumID, mutatedModules)) + s.push_back(p.s); + } + } + // An alternative, but more confusing way + // for(auto const &p : Epistasis ) { + // if(p.NumID[0] > 0 ) { + // if(!includes(mutatedModules.begin(), mutatedModules.end(), + // p.NumID.begin(), p.NumID.end())) + // continue; + // } else { + // if(!match_negative_epist(p.NumID, mutatedModules)) + // continue; + // } + // s.push_back(p.s); + // } + return s; +} + +// FIXME: can we make it faster if we know each module a single gene? +// FIXME: if genotype is always kept sorted, with drivers first, can it be +// faster? As well, note that number of drivers is automatically known +// from this table of constraints. + +// int getPosetByChild(const int child, +// const std::vector& Poset) { + +// } + +std::vector evalPosetConstraints(const std::vector& mutatedModules, + const std::vector& Poset, + const std::vector& allPosetG) { + + // check_disable_later + if(! is_sorted(mutatedModules.begin(), mutatedModules.end())) + throw std::logic_error("mutatedModules not sorted in evalPosetConstraints." + " Bug in R code."); + + size_t numDeps; + size_t sumDepsMet = 0; + Dependency deptype; + std::vector parent_matches; + std::vector s; + + //This works reverted w.r.t. to evalOrderEffects and evalEpistasis: + //there, I examine if the effect is present in the genotype. Here, I + //examine if the genotype satisfies the constraints. + + + // Since the genotype can contain genes not in the poset, first find + // those that are mutated in the genotype AND are in the poset. Then + // check if the mutated have restrictions satisfied. + + std::vector MPintersect; + + + std::set_intersection(allPosetG.begin(), allPosetG.end(), + mutatedModules.begin(), mutatedModules.end(), + std::back_inserter(MPintersect)); + + // We know MPintersect is sorted, so we can avoid an O(n*n) loop + size_t i = 0; + for(auto const &m : MPintersect) { + while ( Poset[i].childNumID != m) ++i; + // Not to catch the twisted case of an XOR with a 0 and something else + // as parents + if( (Poset[i].parentsNumID[0] == 0) && + (Poset[i].parentsNumID.size() == 1)) { + s.push_back(Poset[i].s); + } else { + parent_matches.clear(); + std::set_intersection(mutatedModules.begin(), mutatedModules.end(), + Poset[i].parentsNumID.begin(), + Poset[i].parentsNumID.end(), + back_inserter(parent_matches)); + sumDepsMet = parent_matches.size(); + numDeps = Poset[i].parentsNumID.size(); + deptype = Poset[i].typeDep; + + if( ((deptype == Dependency::semimonotone) && + (sumDepsMet)) || + ((deptype == Dependency::monotone) && + (sumDepsMet == numDeps)) || + ((deptype == Dependency::xmpn) && + (sumDepsMet == 1)) ) { + s.push_back(Poset[i].s); + } else { + s.push_back(Poset[i].sh); + } + } + } + // for(auto const &parent : Poset[i].parentsNumID ) { + // parent_module_mutated = binary_search(mutatedModules.begin(), + // mutatedModules.end(), + // parent); + // if(parent_module_mutated) { + // ++sumDepsMet; + // if( deptype == Dependency::semimonotone ) { + // known = true; + // break; + // } + // if( (deptype == Dependency::xmpn && (sumDepsMet > 1))) { + // knwon = true; + // break; + // } + // } + // } + return s; +} + + + + +std::vector evalGenotypeFitness(const Genotype& ge, + const fitnessEffectsAll& F){ + + // check_disable_later + checkLegitGenotype(ge, F); + + std::vector s; + if( (ge.orderEff.size() + ge.epistRtEff.size() + + ge.rest.size() + ge.flGenes.size()) == 0) { + Rcpp::warning("WARNING: you have evaluated fitness of a genotype of length zero."); + // s.push_back(1.0); //Eh??!! 1? or 0? FIXME It should be empty! and have prodFitness + // deal with it. + return s; + } + + // If we are dealing with a fitness landscape, that is as far as we go here + // at least for now. No other genes affect fitness. + // But this can be easily fixed in the future; do not return + // s below, but keep adding, maybe the noIntGenes. + // Recall also prodFitness uses, well, the prod of 1 + s + // so we want an s s.t. 1 + s = birth rate passed, + // which is the value in the fitness landscape as interpreted now. + // i.e., s = birth rate - 1; + if(F.fitnessLandscape.NumID.size()) { + std::string gs = concatIntsString(ge.flGenes); + if(F.fitnessLandscape.flmap.find(gs) == F.fitnessLandscape.flmap.end()) { + s.push_back(-1.0); + } else { + s.push_back(F.fitnessLandscape.flmap.at(gs) - 1); + } + return s; + } + + + // Genes without any restriction or epistasis are just genes. No modules. + // So simple we do it here. + if(F.genesNoInt.shift > 0) { + int shift = F.genesNoInt.shift; + for(auto const & r : ge.rest ) { + s.push_back(F.genesNoInt.s[r - shift]); + } + } + + // For the rest, there might be modules. Three different effects on + // fitness possible: as encoded in Poset, general epistasis, order effects. + + // Epistatis and poset are checked against all mutations. Create single + // sorted vector with all mutations and map to modules, if needed. Then + // eval. + + // Why not use a modified genotypeSingleVector without the no ints? We + // could, but not necessary. And you can place genes in any order you + // want, since this is not for order restrictions. That goes below. + // Why do I put the epist first? See previous answer. + // Why do I sort if one to one? binary searches. Not done below for order. + std::vector mutG (ge.epistRtEff); + // A gene can be involved in epistasis and order. This gene would only + // be in the orderEff vector, as seen in "createNewGenotype" or + // "convertGenotypeFromInts" + mutG.insert( mutG.end(), ge.orderEff.begin(), ge.orderEff.end()); + std::vector mutatedModules; + if(F.gMOneToOne) { + sort(mutG.begin(), mutG.end()); + mutatedModules = mutG; + } else { + mutatedModules = GeneToModule(mutG, F.Gene_Module_tabl, true, true); + } + std::vector srt = + evalPosetConstraints(mutatedModules, F.Poset, F.allPosetG); + std::vector se = + evalEpistasis(mutatedModules, F.Epistasis); + + // For order effects we need a new vector of mutatedModules: + if(F.gMOneToOne) { + mutatedModules = ge.orderEff; + } else { + mutatedModules = GeneToModule(ge.orderEff, F.Gene_Module_tabl, false, true); + } + + std::vector so = + evalOrderEffects(mutatedModules, F.orderE); + + // I keep s, srt, se, so separate for now for debugging. + s.insert(s.end(), srt.begin(), srt.end()); + s.insert(s.end(), se.begin(), se.end()); + s.insert(s.end(), so.begin(), so.end()); + + return s; +} + + + + + + +vector getGenotypeDrivers(const Genotype& ge, const vector& drv) { + // Returns the actual mutated drivers in a genotype. + // drv comes from R, and it is the vector with the + // numbers of the genes, not modules. + vector presentDrv; + vector og = allGenesinGenotype(ge); + set_intersection(og.begin(), og.end(), + drv.begin(), drv.end(), + back_inserter(presentDrv)); + return presentDrv; +} + + +double evalMutator(const Genotype& fullge, + const std::vector& full2mutator, + const fitnessEffectsAll& muEF, + bool verbose = false) { + // In contrast to nr_fitness, that sets birth and death, this simply + // returns the multiplication factor for the mutation rate. This is used + // by mutationFromParent and mutationFromScratch + + // Remember that the fitnessEffectsAll struct for mutator does not use + // the same mapping from gene names to gene numerical IDs as for + // fitness. fitnessEffectsAll and its associated algorithms expects the + // present genes to be indexed as successive integers. That is not + // necessarily the case if only some of the genes are in the mutator + // fitnessEffectsAll. So we need to remap the gene numerical IDs. We + // could try remapping by gene inside the struct, but painful and + // error-prone. Much simpler at least for now to do: + + // full genotype -> vector of ints (preserving order) + // -> convert the ints to the ints for the genotype of mutator + // -> genotype in terms of mutator + + // the "genotype in terms of mutator" is never preserved. It is just a + // transient mapping. + + // This will NOT work if we ever have order effects for mutator as we do + // not record order for those that matter for mutator if they do not matter for + // fitness. + + vector g1 = genotypeSingleVector(fullge); + vector g2; + int tmp; + for (auto const & i : g1) { + tmp = full2mutator[i - 1]; //gives a -9 if no correspondence + if( tmp > 0 ) g2.push_back(tmp); + } + if(g2.size() == 0) { + return 1.0; + } else { + Genotype newg = convertGenotypeFromInts(g2, muEF); + vector s = evalGenotypeFitness(newg, muEF); + + // just for checking + if(verbose) { + std::string sprod = "mutator product"; + Rcpp::Rcout << "\n Individual " << sprod << " terms are :"; + for(auto const &i : s) Rcpp::Rcout << " " << i; + Rcpp::Rcout << std::endl; + } + return prodMuts(s); + } +} + + +// [[Rcpp::export]] +double evalRGenotype(Rcpp::IntegerVector rG, Rcpp::List rFE, + bool verbose, bool prodNeg, + Rcpp::CharacterVector calledBy_) { + // Can evaluate both ONLY fitness or ONLY mutator. Not both at the same + // time. Use evalRGenotypeAndMut for that. + const std::string calledBy = Rcpp::as(calledBy_); + + if(rG.size() == 0) { + // Why don't we evaluate it? + Rcpp::warning("WARNING: you have evaluated fitness/mutator status of a genotype of length zero."); + return 1; + } + + //const Rcpp::List rF(rFE); + fitnessEffectsAll F = convertFitnessEffects(rFE); + Genotype g = convertGenotypeFromR(rG, F); + vector s = evalGenotypeFitness(g, F); + if(verbose) { + std::string sprod; + if(calledBy == "evalGenotype") { + sprod = "s"; + } else { // if (calledBy == "evalGenotypeMut") { + sprod = "mutator product"; + } + Rcpp::Rcout << "\n Individual " << sprod << " terms are :"; + for(auto const &i : s) Rcpp::Rcout << " " << i; + Rcpp::Rcout << std::endl; + } + if(calledBy == "evalGenotype") { + if(!prodNeg) + return prodFitness(s); + else + return prodDeathFitness(s); + } else { //if (calledBy == "evalGenotypeMut") { + return prodMuts(s); + } +} + + +// [[Rcpp::export]] +Rcpp::NumericVector evalRGenotypeAndMut(Rcpp::IntegerVector rG, + Rcpp::List rFE, + Rcpp::List muEF, + Rcpp::IntegerVector full2mutator_, + bool verbose, bool prodNeg) { + // Basically to test evalMutator. We repeat the conversion to genotype, + // but that is unavoidable here. + + + NumericVector out(2); + + // For fitness. Except for "evalGenotypeFromR", all is done as in the + // rest of the internal code for evaluating a genotype. + fitnessEffectsAll F = convertFitnessEffects(rFE); + fitnessEffectsAll muef = convertFitnessEffects(muEF); + Genotype g = convertGenotypeFromR(rG, F); + vector s = evalGenotypeFitness(g, F); + if(!prodNeg) + out[0] = prodFitness(s); + else + out[0] = prodDeathFitness(s); + if(verbose) { + std::string sprod = "s"; + Rcpp::Rcout << "\n Individual " << sprod << " terms are :"; + for(auto const &i : s) Rcpp::Rcout << " " << i; + Rcpp::Rcout << std::endl; + } + // out[0] = evalRGenotype(rG, rFE, verbose, prodNeg, "evalGenotype"); + // Genotype fullge = convertGenotypeFromR(rG, F); + + const std::vector full2mutator = Rcpp::as >(full2mutator_); + out[1] = evalMutator(g, full2mutator, muef, verbose); + + return out; +} + + + +double mutationFromScratch(const std::vector& mu, + const spParamsP& spP, + const Genotype& g, + const fitnessEffectsAll& fe, + const int mutationPropGrowth, + const std::vector full2mutator, + const fitnessEffectsAll& muEF) { + double mumult; + if(full2mutator.size() > 0) { // so there are mutator effects + mumult = evalMutator(g, full2mutator, muEF); + } else mumult = 1.0; + + if(mu.size() == 1) { + if(mutationPropGrowth) + return(mumult * mu[0] * spP.numMutablePos * spP.birth); + else + return(mumult * mu[0] * spP.numMutablePos); + } else { + std::vector sortedG = allGenesinGenotype(g); + std::vector nonmutated; + set_difference(fe.allGenes.begin(), fe.allGenes.end(), + sortedG.begin(), sortedG.end(), + back_inserter(nonmutated)); + // std::vector mutatedG = genotypeSingleVector(g); + // Not worth it using an accumulator? + // std::vector gg = genotypeSingleVector(g); + // accumulate(gg.begin(), gg.end(), 0.0, + // [](double x, int y) {return( x + mu[y - 1])}); + double mutrate = 0.0; + for(auto const &nm : nonmutated) { + mutrate += mu[nm - 1]; + } + if(mutationPropGrowth) + mutrate *= spP.birth; + return(mumult * mutrate); + } +} + + +std::vector < std::vector > list_to_vector_of_int_vectors(Rcpp::List vlist) { + // As it says. We check each vector is sorted! + std::vector < std::vector > vv(vlist.size()); + for(int i = 0; i != vlist.size(); ++i) { + vv[i] = Rcpp::as >(vlist[i]); + if( ! is_sorted(vv[i].begin(), vv[i].end()) ) + throw std::logic_error("Fixation genotypes not sorted. Bug in R code."); + } + return vv; +} + +// // [[Rcpp::export]] +// void wrap_list_to_vector_of_int_vectors(Rcpp::List vlist) { +// std::vector < std::vector > vo(vlist.size()); +// vo = list_to_vector_of_int_vectors(vlist); +// for(int ii = 0; ii != vo.size(); ++ii) { +// Rcpp::Rcout << "\n"; +// Rcpp::Rcout << " list position " << ii + 1 << ": "; +// for(int jj = 0; jj != vo[ii].size(); ++jj ) { +// Rcpp::Rcout << vo[ii][jj] << " "; +// } +// } +// Rcpp::Rcout << "\n"; +// } + + + +// Wrong when/if there are mutator effects. For suppose there wre, and +// they affected the parent, but no new mutator gene affects the child. +// We will, however, multiply twice by the mutator effect. Therefore, we +// disable this for now. We could fix this, checking if there are new +// mutation effects, or mutliplying/subtracting only new, etc. But too +// much of a mess. +// double mutationFromParent(const std::vector& mu, +// const spParamsP& newP, +// const spParamsP& parentP, +// const std::vector& newMutations, +// // const std::vector& nonmutated, +// const int mutationPropGrowth, +// const Genotype& fullge, +// const std::vector full2mutator, +// const fitnessEffectsAll& muEF) { +// double mumult; +// if(full2mutator.size() > 0) { // so there are mutator effects +// mumult = evalMutator(fullge, full2mutator, muEF); +// } else mumult = 1.0; + +// if(mu.size() == 1) { +// if(mutationPropGrowth) +// return(mumult * mu[0] * newP.numMutablePos * newP.birth); +// else +// return(mumult * mu[0] * newP.numMutablePos); +// } else { +// double mutrate = parentP.mutation; +// for(auto const mutated : newMutations) { +// mutrate -= mu[mutated - 1]; +// } +// if(mutationPropGrowth) +// mutrate *= newP.birth; +// return(mumult * mutrate); +// } +// } + + + +// About order of genes and their names, etc + +// We first read the R gene module. The $geneModule. The function is +// R_GeneModuleToGeneModule + +// We also read the no interaction. They have their own number-name +// correspondence, within the noInt genes part. See the struct +// genesWithoutInt. But that already comes order from R with numbers +// starting after the last gene with interaction. See the R function +// allFitnessEffects. diff --git a/OncoSimulR/src-i386/new_restrict.h b/OncoSimulR/src-i386/new_restrict.h new file mode 100644 index 00000000..343caf75 --- /dev/null +++ b/OncoSimulR/src-i386/new_restrict.h @@ -0,0 +1,316 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + + +#ifndef _NEW_RESTRICT_H__ +#define _NEW_RESTRICT_H__ + +#include "debug_common.h" +#include "common_classes.h" +// #include "randutils.h" //Nope, until we have gcc-4.8 in Win; full C++11 +#include +#include +#include + +// Yes, even if covr suggests epistasis, Poset_struct and +// Gene_Module_struct are not used they are used a lot. +// There are many vectors of these structs. This is just a problem +// of coverage testing of structs. Google for it. + +enum class Dependency {monotone, semimonotone, xmpn, single, NA}; +// enum class TypeModel {exp, bozic1, mcfarlandlog, mcfarland, +// beerenwinkel, mcfarland0, bozic2}; +// enum class TypeModel {exp, bozic1, mcfarlandlog}; + +struct genesWithoutInt { + int shift; // access the s as s[index of mutation or index of mutated + // gene in genome - shift]. shift is the min. of NumID, given + // how that is numbered from R. We assume mutations always + // indexed 1 to something. Not 0 to something. + // If shift is -9, no elements The next first two are not really + // needed. Will remove later. Nope! we use them to provide nice output. + std::vector NumID; + std::vector names; + std::vector s; +}; + + +struct fitnessLandscape_struct { + std::vector NumID; + std::vector names; + // zz: maybe not a char; hold on + std::map flmap; +}; + +struct Poset_struct { + Dependency typeDep; + int childNumID; //Not redundant + double s; + double sh; + std::vector parentsNumID; + // The next two are clearly redundant but a triple check + std::string child; + std::vector parents; +}; + +// We use same structure for epistasis and order effects. With order +// effects, NumID is NOT sorted, but reflects the order of the +// restriction. And checking is done using that fact. + +struct epistasis { + double s; + std::vector NumID; //a set instead? nope.using includes with epistasis + std::vector names; // will remove later +}; + + + +struct Gene_Module_struct { + std::string GeneName; + std::string ModuleName; + int GeneNumID; + int ModuleNumID; +}; + +struct fitnessEffectsAll { + bool gMOneToOne; + int genomeSize; + // We use allOrderG or allEpistRTG to place new mutations in their + // correct place (orderEff or epistRtEff). Only one is needed. Use the + // one that is presumably always shorter which is allOrderG. And this is + // sorted. + std::vector allOrderG; // Modules or genes if one-to-one. + // std::vector allEpistRTG; + + // This makes it faster to run evalPosetConstraints + std::vector allPosetG; //Modules or genes if one-to-one. Only + //poset. Not epist. + std::vector Poset; + std::vector Epistasis; + std::vector orderE; + // std::vector Gene_Module_tabl; + std::vector Gene_Module_tabl; + std::vector allGenes; //used whenever a mutation created. Genes, + //not modules. Sorted. + std::vector drv; // Sorted. + genesWithoutInt genesNoInt; + // zz: + fitnessLandscape_struct fitnessLandscape; +}; + +inline fitnessEffectsAll nullFitnessEffects() { + // Make it explicit + fitnessEffectsAll f; + f.gMOneToOne = true; + f.genomeSize = 0; + f.allOrderG.resize(0); + f.allPosetG.resize(0); + f.Poset.resize(0); + f.Epistasis.resize(0); + f.orderE.resize(0); + f.Gene_Module_tabl.resize(0); + f.allGenes.resize(0); + f.drv.resize(0); + f.genesNoInt.shift = -99L; + f.genesNoInt.NumID.resize(0); + f.genesNoInt.names.resize(0); + f.genesNoInt.s.resize(0); + f.fitnessLandscape.NumID.resize(0); + f.fitnessLandscape.names.resize(0); + f.fitnessLandscape.flmap.clear(); + return f; +} + + +// FIXME: fitness_as_genes and Genotype are identical +// structures. Why not use the same thing? +// Because even if just four vectors of ints, have different meaning. +// Humm... +struct fitness_as_genes { + // fitnessEffectsAll in terms of genes. Useful for output + // conversions. There could be genes that are both in orderG and + // posetEpistG. In such a case, only in orderG. + // We only use a small part for now. + // All are ordered vectors. + std::vector orderG; + std::vector posetEpistG; + std::vector noInt; + std::vector flGenes; +}; + +inline fitness_as_genes zero_fitness_as_genes() { + fitness_as_genes g; + g.orderG.resize(0); + g.posetEpistG.resize(0); + g.noInt.resize(0); + g.flGenes.resize(0); + return g; +} +// There are no shared genes in order and epist. Any gene in orderEff can +// also be in the posets or general epistasis, but orderEff is only for +// those that have order effects. + +// For all genes for which there are no order effects, any permutation of +// the same mutations is the same genotype, and has the same fitness. That +// is why we separate orderEff, which is strictly in the order in which +// mutations accumulate, and thus usorted, from the other effects, that +// are always kept sorted. + +// rest are those genes that have no interactions. Evaluating their +// fitness is simple, and there can be no modules here. +struct Genotype { + std::vector orderEff; + std::vector epistRtEff; //always sorted + std::vector rest; // always sorted + std::vector flGenes; // always sorted; the fitness landscape genes +}; + + + + + +inline Genotype wtGenotype() { + // Not needed but to make it explicit + Genotype g; + g.orderEff.resize(0); + g.epistRtEff.resize(0); + g.rest.resize(0); + g.flGenes.resize(0); + return g; +} + +// struct st_PhylogNum { +// double time; +// std::vector parent; +// std::vector child; +// }; + +// This is all we need to then use igraph on the data frame. +struct PhylogName { + std::vector time; + std::vector parent; + std::vector child; + std::vector pop_size_child; + // yes, implicit constructor clears +}; + + +// This is all we need to then use igraph on the data frame. +// simplified for the LOD that is always stored +struct LOD { + // std::vector time; + std::vector parent; + std::vector child; +}; + +// We only need the string, but if we store the genotype as such +// we can avoid a costly conversion that often leads to storing nothing +// in +struct POM { + // std::vector time; + std::vector genotypesString; + std::vector genotypes; +}; + + + + +std::vector genotypeSingleVector(const Genotype& ge); + +bool operator==(const Genotype& lhs, const Genotype& rhs); + +// bool operator<(const Genotype& lhs, const Genotype& rhs); + + +TypeModel stringToModel(const std::string& dep); + +Dependency stringToDep(const std::string& dep); + +// std::string depToString(const Dependency dep); + +void obtainMutations(const Genotype& parent, + const fitnessEffectsAll& fe, + int& numMutablePosParent, + std::vector& newMutations, + //randutils::mt19937_rng& ran_gen + std::mt19937& ran_gen, + std::vector mu); + +Genotype createNewGenotype(const Genotype& parent, + const std::vector& mutations, + const fitnessEffectsAll& fe, + std::mt19937& ran_gen, + //randutils::mt19937_rng& ran_gen + bool random); + +std::vector evalGenotypeFitness(const Genotype& ge, + const fitnessEffectsAll& F); + + +fitnessEffectsAll convertFitnessEffects(Rcpp::List rFE); +std::vector getGenotypeDrivers(const Genotype& ge, const std::vector& drv); +std::vector allGenesinGenotype(const Genotype& ge); +void print_Genotype(const Genotype& ge); + +fitness_as_genes fitnessAsGenes(const fitnessEffectsAll& fe); + +std::map mapGenesIntToNames(const fitnessEffectsAll& fe); + +std::vector getGenotypeDrivers(const Genotype& ge, const std::vector& drv); + +double prodFitness(const std::vector& s); + +double prodDeathFitness(const std::vector& s); + +double mutationFromScratch(const std::vector& mu, + const spParamsP& spP, + const Genotype& g, + const fitnessEffectsAll& fe, + const int mutationPropGrowth, + const std::vector full2mutator, + const fitnessEffectsAll& muEF); + +// double mutationFromParent(const std::vector& mu, +// const spParamsP& newP, +// const spParamsP& parentP, +// const std::vector& newMutations, +// // const std::vector& nonmutated, +// const int mutationPropGrowth, +// const Genotype& fullge, +// const std::vector full2mutator, +// const fitnessEffectsAll& muEF); + +double prodMuts(const std::vector& s); + + +double set_cPDetect(const double n2, const double p2, + const double PDBaseline); + +bool detectedSizeP(const double n, const double cPDetect, + const double PDBaseline, std::mt19937& ran_gen); + +std::vector < std::vector > list_to_vector_of_int_vectors(Rcpp::List vlist); + +void addToPOM(POM& pom, + const Genotype& genotype, + const std::map& intName, + const fitness_as_genes& fg); + +void addToPOM(POM& pom, + const std::string string); + +#endif + diff --git a/OncoSimulR/src-i386/new_restrict_former_print_utils.cpp b/OncoSimulR/src-i386/new_restrict_former_print_utils.cpp new file mode 100644 index 00000000..6021ecb1 --- /dev/null +++ b/OncoSimulR/src-i386/new_restrict_former_print_utils.cpp @@ -0,0 +1,174 @@ +// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + + +// This code is not being used in production. It was used during +// development, to see what C++ was thinking about what R was passing to +// it. It is left here, but is now all commented out. + + + +// // [[Rcpp::export]] +// void readFitnessEffects(Rcpp::List rFE, +// bool echo) { +// // fitnessEffectsAll fitnessEffects; +// // convertFitnessEffects(rFE, fitnessEffects); +// fitnessEffectsAll fitnessEffects = convertFitnessEffects(rFE); +// if(echo) { +// printFitnessEffects(fitnessEffects); +// } +// } + + + + + +// void printPoset(const std::vector& Poset) { + +// int counterInfs = 0; +// int counterNegInfs = 0; +// Rcpp::Rcout << "\n ********** Poset or Restriction table (internal) *******" +// << std::endl; +// if(!Poset.size()) { +// Rcpp::Rcout << "No posets: restriction table of size 0"<< std::endl; +// } else { +// Rcpp::Rcout << "Size = " << (Poset.size() - 1) << std::endl; +// for(size_t i = 1; i != Poset.size(); ++i) { +// // We do not show the Poset[0] +// Rcpp::Rcout <<"\t Dependent Module or gene (child) " << i +// << ". childNumID: " << Poset[i].childNumID +// << ". child full name: " << Poset[i].child +// << std::endl; +// Rcpp::Rcout <<"\t\t typeDep = " << depToString(Poset[i].typeDep) << ' ' ; +// Rcpp::Rcout <<"\t s = " << Poset[i].s << " "; +// Rcpp::Rcout <<"\t sh = " << Poset[i].sh << std::endl; +// if(std::isinf(Poset[i].sh)) +// ++counterInfs; +// if(std::isinf(Poset[i].sh) && (Poset[i].sh < 0)) +// ++counterNegInfs; +// Rcpp::Rcout << "\t\t Number of parent modules or genes = " << +// Poset[i].parents.size() << std::endl; +// Rcpp::Rcout << "\t\t\t Parents IDs: "; +// for(auto const &c : Poset[i].parentsNumID) +// Rcpp::Rcout << c << "; "; +// Rcpp::Rcout << std::endl; +// Rcpp::Rcout << "\t\t\t Parents names: "; +// for(auto const &c : Poset[i].parents) +// Rcpp::Rcout << c << "; "; +// Rcpp::Rcout << std::endl; + +// // for(size_t j = 0; j != Poset[i].deps.size(); ++j) { +// // Rcpp::Rcout << "\t\t\t\t Module " << (j + 1) << ": " +// // << Poset[i].deps[j] << std::endl; +// } +// Rcpp::Rcout << std::endl; + +// if(counterInfs) { +// Rcpp::Rcout << "In sh there were " << counterNegInfs +// << " negative infinites and " +// << (counterInfs - counterNegInfs) +// << " positive infinites" << std::endl; +// } +// } +// } + + +// void printGene_Module_table(const +// std::vector& Gene_Module_tabl, +// const bool gMOneToOne) { +// // Rcpp::Rcout << +// // "\n\n******** geneModule table (internal) *******:\nGene name\t Gene NumID\t Module name\t Module NumID\n"; +// // for(auto it = Gene_Module_tabl.begin(); it != Gene_Module_tabl.end(); ++it) { +// // Rcpp::Rcout << '\t' << it->GeneName << '\t' << it->GeneNumID << '\t' +// // << it->ModuleName << '\t' << it->ModuleNumID << std::endl; +// // } + +// Rcpp::Rcout << +// "\n\n******** geneModule table (internal) *******:\n" << +// std::setw(14) << std::left << "Gene name" << std::setw(14) << "Gene NumID" << std::setw(14) +// << "Module name" << std::setw(14) << "Module NumID" << "\n"; +// for(auto it = Gene_Module_tabl.begin(); it != Gene_Module_tabl.end(); ++it) { +// Rcpp::Rcout << std::setw(14) << std::left << it->GeneName << std::setw(14) +// << it->GeneNumID << std::setw(14) << it->ModuleName +// << std::setw(14) << it->ModuleNumID << std::endl; +// } + + +// if(gMOneToOne) +// Rcpp::Rcout << "This is a dummy module table: each module is one gene." +// << std::endl; +// } + + + +// void printOtherEpistasis(const std::vector& Epistasis, +// const std::string effectName, +// const std::string sepstr) { +// Rcpp::Rcout << "\n ********** General " << effectName << "s (internal) *******" +// << std::endl; +// if(!Epistasis.size()) { +// Rcpp::Rcout << "No general " << effectName << std::endl; +// } else { +// Rcpp::Rcout << " Number of " << effectName <<"s = " << Epistasis.size(); +// for(size_t i = 0; i != Epistasis.size(); ++i) { +// Rcpp::Rcout << "\n\t " << effectName << " " << i + 1 << ": " << +// ". Modules or Genes (names) = " << Epistasis[i].names[0]; +// for(size_t j = 1; j != Epistasis[i].NumID.size(); ++j) { +// Rcpp::Rcout << sepstr << Epistasis[i].names[j] ; +// } +// Rcpp::Rcout << ".\t Modules or Genes (NumID) = " << Epistasis[i].NumID[0]; +// for(size_t j = 1; j != Epistasis[i].NumID.size(); ++j) { +// Rcpp::Rcout << sepstr << Epistasis[i].NumID[j] ; +// } +// Rcpp::Rcout << ".\t s = " << Epistasis[i].s; +// } +// } +// Rcpp::Rcout << std::endl; +// } + +// void printNoInteractionGenes(const genesWithoutInt& genesNoInt) { +// Rcpp::Rcout << "\n ********** All remaining genes without interactions (internal) *******" +// << std::endl; + +// if(genesNoInt.shift <= 0) { +// Rcpp::Rcout << "No other genes without interactions" << std::endl; +// } else { +// Rcpp::Rcout << std::setw(14) << std::left << "Gene name" << std::setw(14) +// << "Gene NumID" << std::setw(14) << "s" << std::endl; +// for(size_t i = 0; i != genesNoInt.NumID.size(); ++i) { +// Rcpp::Rcout << std::setw(14) << std::left << genesNoInt.names[i] +// << std::setw(14) << genesNoInt.NumID[i] +// << std::setw(14) << genesNoInt.s[i] << '\n'; +// } +// } +// } + +// void printAllOrderG(const std::vector ge) { +// Rcpp::Rcout << "\n ********** NumID of genes/modules in the order restrict. (internal) *******" +// << std::endl; +// for(auto const &g : ge) +// Rcpp::Rcout << g << " "; +// Rcpp::Rcout << std::endl; +// } + + +// void printFitnessEffects(const fitnessEffectsAll& fe) { +// printGene_Module_table(fe.Gene_Module_tabl, fe.gMOneToOne); +// printPoset(fe.Poset); +// printOtherEpistasis(fe.orderE, "order effect", " > "); +// printOtherEpistasis(fe.Epistasis, "epistatic interaction", ", "); +// printNoInteractionGenes(fe.genesNoInt); +// printAllOrderG(fe.allOrderG); +// } diff --git a/OncoSimulR/tests/testthat/test.plotMuller.R b/OncoSimulR/tests/testthat/test.plotMuller.R new file mode 100755 index 00000000..53d59d2e --- /dev/null +++ b/OncoSimulR/tests/testthat/test.plotMuller.R @@ -0,0 +1,76 @@ +inittime <- Sys.time() +cat(paste("\n Starting plotMuller at", date())) + +test_that("oncosimul v.2 objects and genotype plotting", { + data(examplesFitnessEffects) + p1 <- oncoSimulIndiv(examplesFitnessEffects[["o3"]], + model = "McFL", + mu = 5e-5, + detectionSize = 1e8, + detectionDrivers = 3, + sampleEvery = 0.025, + max.num.tries = 10, + keepEvery = 5, + initSize = 2000, + finalTime = 3000, + onlyCancer = FALSE, + keepPhylog = TRUE) + class(p1) + plot(p1, type = "muller") +}) + +test_that("only recognized arguments", { + data(examplesFitnessEffects) + simulWithoutPhyLog<- oncoSimulIndiv(examplesFitnessEffects[["o3"]], + model = "McFL", + mu = 5e-5, + detectionSize = 1e8, + detectionDrivers = 3, + sampleEvery = 0.025, + max.num.tries = 10, + keepEvery = 5, + initSize = 2000, + finalTime = 3000, + onlyCancer = FALSE, + keepPhylog = FALSE) + + expect_error(plot(simulWithoutPhyLog, type = "muller"), + "Object simulation must has property: other$PhylogDF", fixed = TRUE) +}) + +test_that("OncoSimul class", { + data(examplePosets) + p705 <- examplePosets[["p705"]] + simulClassOncosimul1 <- oncoSimulIndiv(p705, model = "McFL", + mu = 5e-6, + sampleEvery = 0.02, + keepEvery = 10, + initSize = 2000, + finalTime = 3000, + max.num.tries = 100, + onlyCancer = FALSE) + + expect_error(plot(simulClassOncosimul1, type = "muller"), + "Type of object class must be: oncosimul2", fixed = TRUE) +}) + +test_that("only recognized arguments muller type", { + data(examplesFitnessEffects) + simulWithoutPhyLog<- oncoSimulIndiv(examplesFitnessEffects[["o3"]], + model = "McFL", + mu = 5e-5, + detectionSize = 1e8, + detectionDrivers = 3, + sampleEvery = 0.025, + max.num.tries = 10, + keepEvery = 5, + initSize = 2000, + finalTime = 3000, + onlyCancer = FALSE, + keepPhylog = TRUE) + + expect_error(plot(simulWithoutPhyLog, type = "muller", muller.type="invent"), + "Type of muller.plot unknown: it must be one offrequency or population", fixed = TRUE) +}) + + diff --git a/OncoSimulR/vignettes/OncoSimulR.Rmd b/OncoSimulR/vignettes/OncoSimulR.Rmd old mode 100644 new mode 100755 index 51632f57..75032377 --- a/OncoSimulR/vignettes/OncoSimulR.Rmd +++ b/OncoSimulR/vignettes/OncoSimulR.Rmd @@ -13,7 +13,7 @@ author: " date: "`r paste0(Sys.Date(),'. OncoSimulR version ', packageVersion('OncoSimulR'), suppressWarnings(ifelse(length(try(system('git rev-parse --short HEAD', ignore.stderr = TRUE, intern = TRUE))), paste0('. Revision: ', system('git rev-parse --short HEAD', intern = TRUE)), '')))`" header-includes: - \input{preamble.tex} -output: +output: bookdown::html_document2: css: custom4.css toc: yes @@ -112,7 +112,7 @@ vignette: > - +render("OncoSimulR.Rmd", output_format = BiocStyle::pdf_document2(toc = TRUE, toc_depth = 4, keep_tex = TRUE)) @@ -7142,7 +7142,9 @@ What types of plots are available? - stacked plots; -- stream plots. +- stream plots; + +- Muller plots. All those three are shown in both of Figure \@ref(fig:baux1) and Figure \@ref(fig:baux2). @@ -7174,6 +7176,7 @@ set.seed(1) b1 <- oncoSimulIndiv(fbauer, mu = 5e-5, initSize = 1000, finalTime = NA, onlyCancer = TRUE, + keepPhylog = TRUE, detectionProb = "default") ``` @@ -7187,14 +7190,16 @@ par(mfrow = c(3, 1)) plot(b1, type = "line", addtot = TRUE) plot(b1, type = "stacked") plot(b1, type = "stream") + ``` ```{r baux2,fig.width=6.5, fig.height=10, fig.cap="Three genotypes' plots of a simulation of Bauer's model"} -par(mfrow = c(3, 1)) +par(mfrow = c(4, 1)) ## Next, genotypes plot(b1, show = "genotypes", type = "line") plot(b1, show = "genotypes", type = "stacked") plot(b1, show = "genotypes", type = "stream") +plot(b1, show = "genotypes", type = "muller") ``` @@ -7211,12 +7216,13 @@ plot(b1, show = "genotypes", type = "stream") -In this case, probably the stream plots are most helpful. Note, however, -that (in contrast to some figures in the literature showing models of -clonal expansion) the stream plot (or the stacked plot) does not try to -explicitly show parent-descendant relationships, which would hardly be -realistically possible in these plots (although the plots of phylogenies -in section \@ref(phylog) could be of help). +In this case, the stream plots and Muller plots are the most helpful. +Note, however, that (in contrast to some figures in the literature +showing models of clonal expansion) the stream plot (or the stacked +plot or Muller plot) does not try to explicitly show parent-descendant +relationships, which would hardly be realistically possible in these +plots (although the plots of phylogenies in section \@ref(phylog) could +be of help). ### McFarland model with 5000 passengers and 70 drivers {#mcf5070} @@ -7389,14 +7395,15 @@ set.seed(1234) evalAllGenotypes(examplesFitnessEffects$cbn1, order = FALSE, model = "Bozic")[1:10, ] sb <- oncoSimulIndiv(examplesFitnessEffects$cbn1, - model = "Bozic", - mu = 5e-6, + model = "Bozic", + mu = 5e-6, detectionProb = "default", - detectionSize = 1e8, - detectionDrivers = 4, - sampleEvery = 2, - initSize = 2000, - onlyCancer = TRUE) + detectionSize = 1e8, + detectionDrivers = 4, + sampleEvery = 2, + initSize = 2000, + onlyCancer = TRUE, + keepPhylog = TRUE) summary(sb) ``` @@ -7435,11 +7442,16 @@ plot(sb,show = "genotypes", type = "stacked", plotDiversity = TRUE) par(cex = 0.75, las = 1) plot(sb,show = "genotypes", type = "stream", plotDiversity = TRUE) ``` +```{r sbx7,fig.width=6.5, fig.height=3.3} +## Genotypes, Muller +par(cex = 0.75, las = 1) +plot(sb,show = "genotypes", type = "muller", plotDiversity = TRUE) +``` The above illustrates again that different types of plots can be useful to reveal different patterns in the data. For instance, here, because of the -huge relative frequency of one of the clones/genotypes, the stacked and -stream plots do not reveal the other clones/genotypes as we cannot use a +huge relative frequency of one of the clones/genotypes, the stacked, +stream and Muller plots do not reveal the other clones/genotypes as we cannot use a log-transformed y-axis, even if there are other clones/genotypes present. @@ -7569,6 +7581,7 @@ tmp <- oncoSimulIndiv(examplesFitnessEffects[["o3"]], initSize = 2000, finalTime = 20000, onlyCancer = FALSE, + keepPhylog = TRUE, extraTime = 1500) tmp ``` @@ -7648,8 +7661,12 @@ point of detection, here specified as three drivers. Instead of specifying number larger than the number of existing possible drivers, and the simulation will run until `finalTime` if `onlyCancer = FALSE`.) - - +```{r tmpmdx8,fig.width=6.5, fig.height=5.3} +par(las = 1, cex = 0.85) +plot(tmp, show = "genotypes", type = "muller") +``` +Using Muller plots it is also easy to appreciate the huge change between +the clones 1 and 6; and how percentage ot the clone 7 grows later. @@ -9259,6 +9276,7 @@ plot(p1, type = "stacked", ask = FALSE) ``` + ## Sampling from a set of simulated subjects {#sample-1} diff --git a/OncoSimulR/vignettes/OncoSimulR.bib b/OncoSimulR/vignettes/OncoSimulR.bib old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/custom4.css b/OncoSimulR/vignettes/custom4.css old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/gitinfo.sty b/OncoSimulR/vignettes/gitinfo.sty old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/gitsetinfo.sty b/OncoSimulR/vignettes/gitsetinfo.sty old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/preamble.tex b/OncoSimulR/vignettes/preamble.tex old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/relfunct.png b/OncoSimulR/vignettes/relfunct.png old mode 100644 new mode 100755 diff --git a/OncoSimulR/vignettes/relfunct.tex b/OncoSimulR/vignettes/relfunct.tex old mode 100644 new mode 100755