Skip to content

Commit

Permalink
Implemented first draft of script compile_lda_runs.R.
Browse files Browse the repository at this point in the history
  • Loading branch information
pcarbo committed Jul 16, 2024
1 parent 627b19f commit 38ba74a
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 8 deletions.
52 changes: 52 additions & 0 deletions scripts/compile_lda_runs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Compile the LDA runs for one data set into a single .RData file.
library(tools)
library(stringr)
library(tm)
library(topicmodels)

# Combine results from all files of the form lda-*.rds in this
# directory.
out.dir <- "../output/nips/rds"

# List all the RDS files containing the model fits.
files <- Sys.glob(file.path(out.dir,"lda-*.rds"))
n <- length(files)

# Set up two data structures: "fits", a list used to store all the
# results; and "dat", a data frame summarizing the model parameters
# and optimization settings used to produce these fits.
fits <- vector("list",n)
labels <- files
labels <- str_remove(labels,paste(out.dir,"/",sep = ""))
labels <- str_remove(labels,".rds")
names(fits) <- labels
dat <- data.frame(label = labels,
k = 0,
method = "",
extrapolate = FALSE,
stringsAsFactors = FALSE)

# Load the results from the RDS files.
for (i in 1:n) {

out <- readRDS(files[i])
fits[[i]] <- out$lda
dat[i,"k"] <- out$lda@k
dat[i,"method"] <- unlist(strsplit(labels[i],"-"))[3]
dat[i,"extrapolate"] <- grepl("ex",labels[i],fixed = TRUE)
}

# Reorder the results in "fits" and "dat".
dat <- transform(dat,method = factor(method,c("em","scd")))
i <- with(dat,order(k,extrapolate,method))
dat <- dat[i,]
fits <- fits[i]
rownames(dat) <- NULL

# Convert the "k" column to a factor.
dat <- transform(dat,k = factor(k))

# Save the combined results to an .RData file.
save(list = c("dat","fits"),
file = "lda.RData")
resaveRdaFiles("lda.RData")
3 changes: 0 additions & 3 deletions scripts/run_lda.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ k <- ncol(fit0$F)
# -------
# Now we are ready to perform variational inference for the LDA model.
#
# For the droplet data with k = 10, this step took roughly 6 min per
# iteration.
#
# For the 68k PBMC data with k = 10, this step took roughly 20 min
# per iteration.
#
Expand Down
8 changes: 4 additions & 4 deletions scripts/run_lda.sbatch
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash

#SBATCH --partition=broadwl
#SBATCH --partition=mstephens
#SBATCH --account=pi-mstephens
#SBATCH --cpus-per-task=4
#SBATCH --mem=8G
#SBATCH --time=24:00:00
#SBATCH --mem=32G
#SBATCH --time=48:00:00

# This script allocates computing resources (CPUs, memory), loads R,
# and runs run_lda.R. See run_lda_all.sh for examples illustrating how
Expand All @@ -14,7 +14,7 @@
#
# (a) .libPaths()[1] should be "/home/pcarbo/R_libs"
#
# (b) Change "mem" to 32G for pbmc_68k.RData.
# (b) Change "mem" to 32G and "time" to 48 h for pbmc_68k.RData.
#

# Get the command-line arguments.
Expand Down
46 changes: 45 additions & 1 deletion scripts/run_lda_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,48 @@ sbatch ${MAIN_SCRIPT} droplet droplet/rds/fit-droplet-scd-ex-k=11
sbatch ${MAIN_SCRIPT} droplet droplet/rds/fit-droplet-scd-ex-k=12

# Run LDA on the 68k pbmc data.
# TO DO.
# data initfile
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=2
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=3
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=4
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=5
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=6
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=7
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=8
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=9
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=10
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=11
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-k=12
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=2
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=3
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=4
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=5
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=6
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=7
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=8
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=9
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=10
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=11
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-em-ex-k=12
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=2
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=3
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=4
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=5
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=6
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=7
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=8
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=9
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=10
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=11
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-k=12
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=2
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=3
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=4
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=5
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=6
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=7
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=8
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=9
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=10
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=11
sbatch ${MAIN_SCRIPT} pbmc_68k pbmc68k/rds/fit-pbmc68k-scd-ex-k=12

0 comments on commit 38ba74a

Please sign in to comment.