Skip to content

Commit

Permalink
Update v0.6.0 (#20)
Browse files Browse the repository at this point in the history
* Update v0.5.0

* Fix vignette typo.

* v0.6.0

* Review allelic_series_test.R

Christoph's suggestion

* Update test-counts.R
  • Loading branch information
zmccaw-insitro authored Jul 25, 2024
1 parent 48f18ac commit 65eaeb5
Show file tree
Hide file tree
Showing 20 changed files with 379 additions and 136 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: AllelicSeries
Title: Allelic Series Test
Version: 0.0.5.0
Version: 0.0.6.0
Authors@R:
c(person(given = "Zachary",
family = "McCaw",
Expand All @@ -18,6 +18,7 @@ Description: Implementation of gene-level rare variant association tests targeti
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
Imports:
methods,
Rcpp,
RNOmni,
SKAT
Expand Down
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# Generated by roxygen2: do not edit by hand

S3method(print,COAST)
export(ASBT)
export(ASKAT)
export(Aggregator)
export(COAST)
export(Comparator)
export(CountAlleles)
export(Counts)
export(DGP)
export(OLS)
exportClasses(COAST)
importFrom(Rcpp,sourceCpp)
importFrom(methods,show)
useDynLib(AllelicSeries, .registration = TRUE)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@

## Version 0.6.0
* Replaced the `CountAlleles` function with a similar (but faster) `Counts` function that returns counts the total number of alleles, variants, and carriers by variant class.
* Updated the formatting of the results output by the main `COAST` function.

## Version 0.5.0

* Added option (`min_mac`) to filter the variant set to only include those variants having at least a minimum minor allele count (10 is recommended).
Expand Down
11 changes: 11 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Count Variants and Carriers
#'
#' @param anno (snps x 1) annotation vector with values in c(0, 1, 2).
#' @param geno (n x snps) genotype matrix.
#' @param min_mac Minimum minor allele count for inclusion. Default: 0.
#' @return Data.frame of allele, variant, and carrier counts.
#' @export
Counts <- function(anno, geno, min_mac = 0L) {
.Call(`_AllelicSeries_Counts`, anno, geno, min_mac)
}

#' Ordinary Least Squares
#'
#' Fits the standard OLS model.
Expand Down
53 changes: 31 additions & 22 deletions R/allelic_series_test.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Purpose: Allelic series test.
# Updated: 2024-04-03
# Updated: 2024-07-24

# Default weights.
DEFAULT_WEIGHTS <- c(1, 2, 3)
Expand Down Expand Up @@ -346,7 +346,6 @@ ASKAT <- function(
#' to PTV variants only in the omnibus test? Default: FALSE.
#' @param is_pheno_binary Is the phenotype binary? Default: FALSE.
#' @param min_mac Minimum minor allele count for inclusion. Default: 0.
#' @param return_counts Include minor allele counts in output? Default: TRUE.
#' @param return_omni_only Return only the omnibus p-value? Default: FALSE.
#' @param score_test Use a score test for burden analysis? If FALSE, uses a
#' Wald test.
Expand Down Expand Up @@ -376,7 +375,6 @@ COAST <- function(
include_orig_skato_ptv = FALSE,
is_pheno_binary = FALSE,
min_mac = 0,
return_counts = TRUE,
return_omni_only = FALSE,
score_test = FALSE,
weights = DEFAULT_WEIGHTS
Expand Down Expand Up @@ -442,12 +440,12 @@ COAST <- function(

# Collect p-values.
p_burden <- c(
p_count = p_count,
p_ind = p_ind,
p_max_count = p_max_count,
p_max_ind = p_max_ind,
p_sum_count = p_sum_count,
p_sum_ind = p_sum_ind
count = p_count,
ind = p_ind,
max_count = p_max_count,
max_ind = p_max_ind,
sum_count = p_sum_count,
sum_ind = p_sum_ind
)

# SKAT tests.
Expand All @@ -461,44 +459,55 @@ COAST <- function(
return_null_model = TRUE,
weights = weights
)
p_skat <- c(p_allelic_skat = skat_list$p)
p_skat <- c(allelic_skat = skat_list$p)
null <- skat_list$null

# Standard SKAT-O all.
if (include_orig_skato_all) {
orig_skato_all <- SKAT::SKAT(geno, null, method = "SKATO")
p_skat <- c(p_skat, p_orig_skat_all = orig_skato_all$p.value)
p_skat <- c(p_skat, orig_skat_all = orig_skato_all$p.value)
}

# Standard SKAT-O PTV.
if (include_orig_skato_ptv) {
ptv <- geno[, anno == 2, drop = FALSE]
if (ncol(ptv) > 0) {
orig_skato_ptv <- SKAT::SKAT(ptv, null, method = "SKATO")
p_skat <- c(p_skat, p_orig_skat_ptv = orig_skato_ptv$p.value)
p_skat <- c(p_skat, orig_skat_ptv = orig_skato_ptv$p.value)
}
}

# Omnibus.
n_burden <- length(p_burden)
n_skat <- length(p_skat)

p_val = c(p_burden, p_skat)
pvals = c(p_burden, p_skat)
omni_weights <- c(rep(1, n_burden), rep(n_burden / n_skat, n_skat))

p_omni <- RNOmni::OmniP(p = p_val, w = omni_weights)
p_omni <- RNOmni::OmniP(p = pvals, w = omni_weights)

# Output.
# Only omnibus p-value requested.
if (return_omni_only) {
out <- c(p_omni = p_omni)
} else {
out <- c(p_val, p_omni = p_omni)
}

if (return_counts) {
counts <- CountAlleles(anno = anno, geno = geno, min_mac = min_mac)
out <- c(counts, out)
return(out)
}

# Format p-values.
pvals <- c(pvals, omni = p_omni)
df_pvals <- data.frame(
test = names(pvals),
type = c(rep("burden", n_burden), rep("skat", n_skat), "omni"),
pval = as.numeric(pvals)
)

# Variant counts.
counts <- Counts(anno = anno, geno = geno, min_mac = min_mac)

# Output.
out <- methods::new(
Class = "COAST",
Counts = counts,
Pvals = df_pvals
)
return(out)
}
56 changes: 56 additions & 0 deletions R/class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Purpose: Output class.
# Updated: 2024-07-24

#' Allelic Series Output Class
#'
#' @slot Counts Allele, variant, and carrier counts.
#' @slot Pvals Result p-values.
#' @name COAST-class
#' @rdname COAST-class
#' @exportClass COAST
setClass(
Class = "COAST",
representation = representation(
Counts = "data.frame",
Pvals = "data.frame"
)
)


#' Print Method for COAST Object.
#'
#' Print method for objects of class \code{COAST}.
#'
#' @param x An object of class \code{COAST}.
#' @param ... Unused.
#' @export
print.COAST <- function(x, ...) {

# Counts.
cat("Counts:\n")
show(x@Counts)
cat("\n\n")

# P-values.
cat("P-values:\n")
pvals <- x@Pvals
pvals$pval <- formatC(pvals$pval, format = "e", digits = 2)
show(pvals)
cat("\n\n")
}


#' Show Method for COAST Object
#'
#' @param object An object of class \code{COAST}.
#' @rdname COAST-method
#' @importFrom methods show
setMethod(
f = "show",
signature = c(object = "COAST"),
definition = function(object) {
print.COAST(x = object)
}
)


50 changes: 0 additions & 50 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -1,56 +1,6 @@
# Purpose: Utility functions.
# Updated: 2024-04-03

#' Count Alleles
#'
#' Count the number of non-zero alleles bearing each variant annotation.
#'
#' @param anno (snps x 1) annotation vector with values in c(0, 1, 2).
#' @param geno (n x snps) genotype matrix.
#' @param count_carriers If true, counts the number of carriers rather than
#' the number of alleles.
#' @param min_mac Minimum minor allele count for inclusion. Default: 0.
#' @return 3 x 1 numeric vector with the counts of BMVs, DMVs, and PTVs.
#' @export
CountAlleles <- function(
anno,
geno,
count_carriers = FALSE,
min_mac = 0
) {

# Minor allele count filtering.
if (min_mac > 0) {
mac <- apply(geno, 2, sum)
keep <- (mac >= min_mac)

anno <- anno[keep]
geno <- geno[, keep, drop = FALSE]
}

# Category counts.
if (count_carriers) {

# Count carriers.
n_bmv <- sum(apply(geno[, anno == 0, drop = FALSE], 1, sum) > 0)
n_dmv <- sum(apply(geno[, anno == 1, drop = FALSE], 1, sum) > 0)
n_ptv <- sum(apply(geno[, anno == 2, drop = FALSE], 1, sum) > 0)

} else {

# Count alleles.
n_bmv <- sum(geno[, anno == 0])
n_dmv <- sum(geno[, anno == 1])
n_ptv <- sum(geno[, anno == 2])

}

# Output.
out <- c(n_bmv = n_bmv, n_dmv = n_dmv, n_ptv = n_ptv)
return(out)
}


#' Linear Association Test
#'
#' @param y (n x 1) continuous phenotype vector
Expand Down
91 changes: 73 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
README
================
2024-07-24

# Allelic Series

This package implements gene-level rare variant association tests
Expand Down Expand Up @@ -93,19 +97,57 @@ genotype matrix, and the phenotype vector.
show(results)
```

## n_bmv n_dmv n_ptv p_count p_ind
## 2.870000e+02 1.620000e+02 6.100000e+01 3.112702e-26 1.322084e-09
## p_max_count p_max_ind p_sum_count p_sum_ind p_allelic_skat
## 3.076876e-10 5.374363e-09 1.661854e-20 2.554417e-11 2.658137e-07
## p_omni
## 3.735235e-25
## Counts:
## anno alleles variants carriers
## 1 0 287 163 96
## 2 1 162 109 82
## 3 2 61 28 45
##
##
## P-values:
## test type pval
## 1 count burden 3.11e-26
## 2 ind burden 1.32e-09
## 3 max_count burden 3.08e-10
## 4 max_ind burden 5.37e-09
## 5 sum_count burden 1.66e-20
## 6 sum_ind burden 2.55e-11
## 7 allelic_skat skat 2.66e-07
## 8 omni omni 3.74e-25

By default, the output of `COAST` includes a data.frame of counts
showing the number of alleles, variants, and carriers in each class that
contributed to the test, and a data.frame of p-values, with the `omni`
test denoting the final, overall p-value. The counts data.frame is
accessed via:

``` r
results@Counts
```

## anno alleles variants carriers
## 1 0 287 163 96
## 2 1 162 109 82
## 3 2 61 28 45

and the p-values data.frame via:

``` r
results@Pvals
```

## test type pval
## 1 count burden 3.112702e-26
## 2 ind burden 1.322084e-09
## 3 max_count burden 3.076876e-10
## 4 max_ind burden 5.374363e-09
## 5 sum_count burden 1.661854e-20
## 6 sum_ind burden 2.554417e-11
## 7 allelic_skat skat 2.658137e-07
## 8 omni omni 3.735235e-25

By default, the output of `COAST` includes counts for the number of
alleles of each variant class that contributed to the test, and a vector
of p-values, corresponding to the different components of the allelic
series test. The final, overall p-value is given by `p_omni`. To return
the omnibus p-value only, specify `return_omni_only = TRUE` when calling
`COAST`.
To return the omnibus p-value only, specify `return_omni_only = TRUE`
when calling `COAST`.

## Robust omnibus test

Expand Down Expand Up @@ -133,12 +175,25 @@ results <- COAST(
show(results)
```

## n_bmv n_dmv n_ptv p_count p_ind
## 2.870000e+02 1.620000e+02 6.100000e+01 3.112702e-26 1.322084e-09
## p_max_count p_max_ind p_sum_count p_sum_ind p_allelic_skat
## 3.076876e-10 5.374363e-09 1.661854e-20 2.554417e-11 2.658137e-07
## p_orig_skat_all p_orig_skat_ptv p_omni
## 1.548324e-05 6.632119e-08 3.735235e-25
## Counts:
## anno alleles variants carriers
## 1 0 287 163 96
## 2 1 162 109 82
## 3 2 61 28 45
##
##
## P-values:
## test type pval
## 1 count burden 3.11e-26
## 2 ind burden 1.32e-09
## 3 max_count burden 3.08e-10
## 4 max_ind burden 5.37e-09
## 5 sum_count burden 1.66e-20
## 6 sum_ind burden 2.55e-11
## 7 allelic_skat skat 2.66e-07
## 8 orig_skat_all skat 1.55e-05
## 9 orig_skat_ptv skat 6.63e-08
## 10 omni omni 3.74e-25

## Loading genotypes

Expand Down
Loading

0 comments on commit 65eaeb5

Please sign in to comment.