Skip to content

Commit

Permalink
Merge pull request #92 from gibsramen/nb-lme-single
Browse files Browse the repository at this point in the history
Add NB LME Single as default model
  • Loading branch information
gibsramen authored Oct 7, 2023
2 parents 4302281 + a1df859 commit 58f763d
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ birdman/templates/negative_binomial
birdman/templates/multinomial
birdman/templates/negative_binomial_single
birdman/templates/negative_binomial_lme
birdman/templates/negative_binomial_lme_single
tests/custom_model

*__pycache__/
Expand Down
4 changes: 2 additions & 2 deletions birdman/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from .model_base import (BaseModel, TableModel, SingleFeatureModel,
ModelIterator)
from .default_models import (NegativeBinomial, NegativeBinomialLME,
NegativeBinomialSingle)
NegativeBinomialSingle, NegativeBinomialLMESingle)

__version__ = "0.1.0"

__all__ = ["BaseModel", "TableModel", "SingleFeatureModel", "ModelIterator",
"NegativeBinomial", "NegativeBinomialSingle",
"NegativeBinomialLME"]
"NegativeBinomialLME", "NegativeBinomialLMESingle"]
124 changes: 119 additions & 5 deletions birdman/default_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from os.path import join as pjoin
from pkg_resources import resource_filename

import biom
Expand All @@ -10,9 +10,10 @@
TEMPLATES = resource_filename("birdman", "templates")
DEFAULT_MODEL_DICT = {
"negative_binomial": {
"standard": os.path.join(TEMPLATES, "negative_binomial.stan"),
"single": os.path.join(TEMPLATES, "negative_binomial_single.stan"),
"lme": os.path.join(TEMPLATES, "negative_binomial_lme.stan")
"standard": pjoin(TEMPLATES, "negative_binomial.stan"),
"single": pjoin(TEMPLATES, "negative_binomial_single.stan"),
"full_lme": pjoin(TEMPLATES, "negative_binomial_lme.stan"),
"single_lme": pjoin(TEMPLATES, "negative_binomial_lme_single.stan")
}
}

Expand Down Expand Up @@ -271,7 +272,7 @@ def __init__(
inv_disp_sd: float = 0.5,
group_var_prior: float = 1.0
):
filepath = DEFAULT_MODEL_DICT["negative_binomial"]["lme"]
filepath = DEFAULT_MODEL_DICT["negative_binomial"]["full_lme"]
super().__init__(
table=table,
model_path=filepath,
Expand Down Expand Up @@ -318,3 +319,116 @@ def __init__(
posterior_predictive="y_predict",
log_likelihood="log_lhood"
)


class NegativeBinomialLMESingle(SingleFeatureModel):
"""Fit count data using negative binomial model on single feature.
.. math::
y_{ij} &\\sim \\textrm{NB}(\\mu_{ij}, \\phi_j)
\\log(\\mu_{ij}) &= \\log(\\textrm{Depth}_i) + x_i \\beta + z_i u
Priors:
.. math::
\\beta_j \\sim \\begin{cases}
\\textrm{Normal}(A, B_p), & j = 0
\\textrm{Normal}(0, B_p), & j > 0
\\end{cases}
.. math:: A = \\ln{\\frac{1}{D}},\\ D = \\textrm{Number of features}
.. math::
\\frac{1}{\\phi_j} \\sim \\textrm{Lognormal}(0, s),\\ s \\in
\\mathbb{R}_{>0}
.. math::
u_j &\\sim \\textrm{Normal}(0, u_p),\\ u_p \\in \\mathbb{R}_{>0}
:param table: Feature table (features x samples)
:type table: biom.table.Table
:param feature_id: ID of feature to fit
:type feature_id: str
:param formula: Design formula to use in model
:type formula: str
:param group_var: Variable in metadata to use as grouping
:type group_var: str
:param metadata: Metadata for design matrix
:type metadata: pd.DataFrame
:param beta_prior: Standard deviation for normally distributed prior values
of beta, defaults to 5.0
:type beta_prior: float
:param inv_disp_sd: Standard deviation for lognormally distributed prior
values of 1/phi, defaults to 0.5
:type inv_disp_sd: float
"""
def __init__(
self,
table: biom.table.Table,
feature_id: str,
formula: str,
group_var: str,
metadata: pd.DataFrame,
beta_prior: float = 5.0,
inv_disp_sd: float = 0.5,
group_var_prior: float = 1.0
):
filepath = DEFAULT_MODEL_DICT["negative_binomial"]["single_lme"]

super().__init__(
table=table,
feature_id=feature_id,
model_path=filepath,
)
self.create_regression(formula=formula, metadata=metadata)

D = table.shape[0]
A = np.log(1 / D)

# Encode group IDs starting at 1 because Stan 1-indexes arrays
group_var_series = metadata[group_var].loc[self.sample_names]
samp_subj_map = group_var_series.astype("category").cat.codes + 1
# Encoding as categories uses alphabetic sorting
self.groups = np.sort(group_var_series.unique())

param_dict = {
"depth": np.log(table.sum(axis="sample")),
"B_p": beta_prior,
"inv_disp_sd": inv_disp_sd,
"A": A,
"subj_ids": samp_subj_map,
"u_p": group_var_prior,
"S": len(self.groups)
}
self.add_parameters(param_dict)

self.specify_model(
params=["beta_var", "inv_disp", "subj_int"],
dims={
"beta_var": ["covariate"],
"log_lhood": ["tbl_sample"],
"y_predict": ["tbl_sample"],
"subj_int": ["group"]
},
coords={
"covariate": self.colnames,
"tbl_sample": self.sample_names,
"group": self.groups
},
include_observed_data=True,
posterior_predictive="y_predict",
log_likelihood="log_lhood"
)
6 changes: 3 additions & 3 deletions birdman/templates/negative_binomial_lme.stan
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ model {
// generating counts
for (n in 1:N){
for (i in 1:D){
target += neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv_disp[i]);
target += neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv(inv_disp[i]));
}
}
}
Expand All @@ -59,8 +59,8 @@ generated quantities {

for (n in 1:N){
for (i in 1:D){
y_predict[n, i] = neg_binomial_2_log_rng(lam_clr[n, i], inv_disp[i]);
log_lhood[n, i] = neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv_disp[i]);
y_predict[n, i] = neg_binomial_2_log_rng(lam_clr[n, i], inv(inv_disp[i]));
log_lhood[n, i] = neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv(inv_disp[i]));
}
}
}
51 changes: 51 additions & 0 deletions birdman/templates/negative_binomial_lme_single.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
data {
int<lower=0> N; // number of samples
int<lower=0> S; // number of groups (subjects)
int<lower=0> p; // number of covariates
real A; // mean intercept
vector[N] depth; // log sequencing depths of microbes
matrix[N, p] x; // covariate matrix
array[N] int y; // observed microbe abundances
array[N] int<lower=1, upper=S> subj_ids; // mapping of samples to subject IDs

real<lower=0> B_p; // stdev for beta normal prior
real<lower=0> inv_disp_sd; // stdev for inv disp lognormal prior
real<lower=0> u_p; // stdev for subject intercept normal prior
}

parameters {
real<offset=A, multiplier=B_p> beta_0;
vector<multiplier=B_p>[p-1] beta_x;
real<lower=0> inv_disp;
vector[S] subj_int;
}

transformed parameters {
vector[p] beta_var = append_row(beta_0, beta_x);
vector[N] lam = x * beta_var + depth;

for (n in 1:N){
lam[n] += subj_int[subj_ids[n]];
}
}

model {
inv_disp ~ lognormal(0., inv_disp_sd);
beta_0 ~ normal(A, B_p);
beta_x ~ normal(0, B_p);
for (j in 1:S){
subj_int[j] ~ normal(0., u_p);
}

y ~ neg_binomial_2_log(lam, inv(inv_disp));
}

generated quantities {
vector[N] log_lhood;
vector[N] y_predict;

for (n in 1:N){
y_predict[n] = neg_binomial_2_log_rng(lam[n], inv(inv_disp));
log_lhood[n] = neg_binomial_2_log_lpmf(y[n] | lam[n], inv(inv_disp));
}
}
19 changes: 18 additions & 1 deletion tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import numpy as np

from birdman import (NegativeBinomial, NegativeBinomialLME,
NegativeBinomialSingle, ModelIterator)
NegativeBinomialSingle, NegativeBinomialLMESingle,
ModelIterator)

TEMPLATES = resource_filename("birdman", "templates")

Expand Down Expand Up @@ -70,6 +71,22 @@ def test_single_feat(self, table_biom, metadata):
nb.compile_model()
nb.fit_model(method="mcmc", num_draws=100)

def test_lme_single_feat(self, table_biom, metadata):
md = metadata.copy()
np.random.seed(42)
md["group"] = np.random.randint(low=0, high=3, size=md.shape[0])
md["group"] = "G" + md["group"].astype(str)
for fid in table_biom.ids(axis="observation"):
nb = NegativeBinomialLMESingle(
table=table_biom,
feature_id=fid,
formula="host_common_name",
group_var="group",
metadata=md,
)
nb.compile_model()
nb.fit_model(num_draws=100)


class TestToInference:
def test_serial_to_inference(self, example_model):
Expand Down

0 comments on commit 58f763d

Please sign in to comment.