diff --git a/DESCRIPTION b/DESCRIPTION index 12ec076..8ba07ea 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: STAARpipeline Type: Package Title: STAARpipeline for Analyzing Whole-Genome/Whole-Exome Sequencing Data Version: 0.9.7.2 -Date: 2024-11-14 +Date: 2024-11-17 Author: Xihao Li [aut, cre], Zilin Li [aut, cre], Sheila M. Gaynor [aut], Han Chen [aut] Maintainer: Xihao Li , Zilin Li Description: An R package for performing STAARpipeline in analyzing whole-genome/whole-exome sequencing data. diff --git a/R/Gene_Centric_Coding.R b/R/Gene_Centric_Coding.R index 5272396..57df225 100644 --- a/R/Gene_Centric_Coding.R +++ b/R/Gene_Centric_Coding.R @@ -25,6 +25,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter"). #' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV"). #' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean"). @@ -47,7 +49,7 @@ #' @export Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof","plof_ds","missense","disruptive_missense","synonymous","ptv","ptv_ds","all_categories_incl_ptv"), - genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, + genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -63,7 +65,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="all_categories") { results <- coding(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -73,7 +76,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="plof") { results <- plof(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -83,7 +87,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="plof_ds") { results <- plof_ds(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -93,7 +98,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="missense") { results <- missense(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -103,7 +109,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="disruptive_missense") { results <- disruptive_missense(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -113,7 +120,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="synonymous") { results <- synonymous(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -124,7 +132,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="ptv") { results <- ptv(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -135,7 +144,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="ptv_ds") { results <- ptv_ds(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -146,7 +156,8 @@ Gene_Centric_Coding <- function(chr,gene_name,category=c("all_categories","plof" if(category=="all_categories_incl_ptv") { results <- coding_incl_ptv(chr,gene_name,genofile,obj_nullmodel,genes, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, diff --git a/R/Gene_Centric_Coding_cond.R b/R/Gene_Centric_Coding_cond.R index b3d7313..121836e 100644 --- a/R/Gene_Centric_Coding_cond.R +++ b/R/Gene_Centric_Coding_cond.R @@ -26,6 +26,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param method_cond a character value indicating the method for conditional analysis. #' \code{optimal} refers to regressing residuals from the null model on \code{known_loci} #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; @@ -53,7 +55,8 @@ #' @export Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds","missense","disruptive_missense","synonymous","ptv","ptv_ds"), - genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, + genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2, + rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, method_cond=c("optimal","naive"), QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, @@ -74,7 +77,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="plof") { results <- plof_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -84,7 +88,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="plof_ds") { results <- plof_ds_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -94,7 +99,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="missense") { results <- missense_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -104,7 +110,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="disruptive_missense") { results <- disruptive_missense_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -114,7 +121,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="synonymous") { results <- synonymous_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -124,7 +132,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="ptv") { results <- ptv_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -134,7 +143,8 @@ Gene_Centric_Coding_cond <- function(chr,gene_name,category=c("plof","plof_ds"," if(category=="ptv_ds") { results <- ptv_ds_cond(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, diff --git a/R/Gene_Centric_Coding_cond_spa.R b/R/Gene_Centric_Coding_cond_spa.R index f23ec2a..b91bc5c 100644 --- a/R/Gene_Centric_Coding_cond_spa.R +++ b/R/Gene_Centric_Coding_cond_spa.R @@ -23,11 +23,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). -#' @param method_cond a character value indicating the method for conditional analysis. -#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci} -#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; -#' \code{naive} refers to regressing residuals from the null model on \code{known_loci} -#' and taking the residuals (default = \code{optimal}). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter"). #' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV"). #' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean"). @@ -52,8 +49,8 @@ #' @export Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_ds","missense","disruptive_missense","synonymous","ptv","ptv_ds"), - genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, - method_cond=c("optimal","naive"), + genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2, + rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -61,7 +58,6 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d ## evaluate choices category <- match.arg(category) - method_cond <- match.arg(method_cond) variant_type <- match.arg(variant_type) geno_missing_imputation <- match.arg(geno_missing_imputation) @@ -74,7 +70,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="plof") { results <- plof_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -84,7 +81,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="plof_ds") { results <- plof_ds_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -94,7 +92,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="missense") { results <- missense_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -104,7 +103,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="disruptive_missense") { results <- disruptive_missense_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -114,7 +114,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="synonymous") { results <- synonymous_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -124,7 +125,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="ptv") { results <- ptv_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -134,7 +136,8 @@ Gene_Centric_Coding_cond_spa <- function(chr,gene_name,category=c("plof","plof_d if(category=="ptv_ds") { results <- ptv_ds_cond_spa(chr,gene_name,genofile,obj_nullmodel,genes, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, diff --git a/R/Gene_Centric_Noncoding.R b/R/Gene_Centric_Noncoding.R index fa92537..ab2c241 100644 --- a/R/Gene_Centric_Noncoding.R +++ b/R/Gene_Centric_Noncoding.R @@ -24,6 +24,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter"). #' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV"). #' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean"). @@ -46,7 +48,7 @@ #' @export Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","downstream","upstream","UTR","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"), - genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, + genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -60,7 +62,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="all_categories") { results <- noncoding(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -70,7 +73,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="downstream") { results <- downstream(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -80,7 +84,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="upstream") { results <- upstream(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -90,7 +95,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="UTR") { results <- UTR(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -100,7 +106,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="promoter_CAGE") { results <- promoter_CAGE(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -110,7 +117,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="promoter_DHS") { results <- promoter_DHS(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -120,7 +128,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="enhancer_CAGE") { results <- enhancer_CAGE(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -130,7 +139,8 @@ Gene_Centric_Noncoding <- function(chr,gene_name,category=c("all_categories","do if(category=="enhancer_DHS") { results <- enhancer_DHS(chr,gene_name,genofile,obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, diff --git a/R/Gene_Centric_Noncoding_cond.R b/R/Gene_Centric_Noncoding_cond.R index b681930..8ebd27b 100644 --- a/R/Gene_Centric_Noncoding_cond.R +++ b/R/Gene_Centric_Noncoding_cond.R @@ -26,6 +26,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param method_cond a character value indicating the method for conditional analysis. #' \code{optimal} refers to regressing residuals from the null model on \code{known_loci} #' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; @@ -53,7 +55,8 @@ #' @export Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","upstream","UTR","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"), - genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, + genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2, + rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, method_cond=c("optimal","naive"), QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, @@ -73,7 +76,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="downstream") { results <- downstream_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -83,7 +87,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="upstream") { results <- upstream_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -93,7 +98,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="UTR") { results <- UTR_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -103,7 +109,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="promoter_CAGE") { results <- promoter_CAGE_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -113,7 +120,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="promoter_DHS") { results <- promoter_DHS_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -123,7 +131,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="enhancer_CAGE") { results <- enhancer_CAGE_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, @@ -133,7 +142,8 @@ Gene_Centric_Noncoding_cond <- function(chr,gene_name,category=c("downstream","u if(category=="enhancer_DHS") { results <- enhancer_DHS_cond(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, method_cond=method_cond, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, diff --git a/R/Gene_Centric_Noncoding_cond_spa.R b/R/Gene_Centric_Noncoding_cond_spa.R index 3172fac..1f3800b 100644 --- a/R/Gene_Centric_Noncoding_cond_spa.R +++ b/R/Gene_Centric_Noncoding_cond_spa.R @@ -23,11 +23,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). -#' @param method_cond a character value indicating the method for conditional analysis. -#' \code{optimal} refers to regressing residuals from the null model on \code{known_loci} -#' as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; -#' \code{naive} refers to regressing residuals from the null model on \code{known_loci} -#' and taking the residuals (default = \code{optimal}). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter"). #' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV"). #' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean"). @@ -52,8 +49,8 @@ #' @export Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream","upstream","UTR","promoter_CAGE","promoter_DHS","enhancer_CAGE","enhancer_DHS"), - genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, - method_cond=c("optimal","naive"), + genofile,obj_nullmodel,known_loci=NULL,rare_maf_cutoff=0.01, + rv_num_cutoff=2,rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -61,7 +58,6 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream ## evaluate choices category <- match.arg(category) - method_cond <- match.arg(method_cond) variant_type <- match.arg(variant_type) geno_missing_imputation <- match.arg(geno_missing_imputation) @@ -73,7 +69,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="downstream") { results <- downstream_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -83,7 +80,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="upstream") { results <- upstream_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -93,7 +91,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="UTR") { results <- UTR_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -103,7 +102,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="promoter_CAGE") { results <- promoter_CAGE_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -113,7 +113,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="promoter_DHS") { results <- promoter_DHS_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -123,7 +124,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="enhancer_CAGE") { results <- enhancer_CAGE_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -133,7 +135,8 @@ Gene_Centric_Noncoding_cond_spa <- function(chr,gene_name,category=c("downstream if(category=="enhancer_DHS") { results <- enhancer_DHS_cond_spa(chr,gene_name,genofile,obj_nullmodel, - known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + known_loci,rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, diff --git a/R/Sliding_Window.R b/R/Sliding_Window.R index ca055d2..a0cc312 100644 --- a/R/Sliding_Window.R +++ b/R/Sliding_Window.R @@ -26,6 +26,8 @@ #' a given variant-set (default = 2). #' @param rv_num_cutoff_max the cutoff of maximum number of variants of analyzing #' a given variant-set (default = 1e+09). +#' @param rv_num_cutoff_max_prefilter the cutoff of maximum number of variants +#' before extracting the genotype matrix (default = 1e+09). #' @param QC_label channel name of the QC label in the GDS/aGDS file (default = "annotation/filter"). #' @param variant_type type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV"). #' @param geno_missing_imputation method of handling missing genotypes. Either "mean" or "minor" (default = "mean"). @@ -48,7 +50,8 @@ #' @export Sliding_Window <- function(chr,start_loc,end_loc,sliding_window_length=2000,type=c("single","multiple"), - genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, + genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2, + rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -63,7 +66,8 @@ Sliding_Window <- function(chr,start_loc,end_loc,sliding_window_length=2000,type { results <- Sliding_Window_Single(chr=chr,start_loc=start_loc,end_loc=end_loc, genofile=genofile,obj_nullmodel=obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, @@ -75,7 +79,8 @@ Sliding_Window <- function(chr,start_loc,end_loc,sliding_window_length=2000,type results <- Sliding_Window_Multiple(chr=chr,start_loc=start_loc,end_loc=end_loc, sliding_window_length=sliding_window_length, genofile=genofile,obj_nullmodel=obj_nullmodel, - rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff,rv_num_cutoff_max=rv_num_cutoff_max, + rare_maf_cutoff=rare_maf_cutoff,rv_num_cutoff=rv_num_cutoff, + rv_num_cutoff_max=rv_num_cutoff_max,rv_num_cutoff_max_prefilter=rv_num_cutoff_max_prefilter, QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog, Use_annotation_weights=Use_annotation_weights,Annotation_name=Annotation_name, diff --git a/R/Sliding_Window_Multiple.R b/R/Sliding_Window_Multiple.R index de95311..f6a1f01 100644 --- a/R/Sliding_Window_Multiple.R +++ b/R/Sliding_Window_Multiple.R @@ -1,4 +1,5 @@ -Sliding_Window_Multiple <- function(chr,start_loc,end_loc,sliding_window_length=2000,genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2,rv_num_cutoff_max=1e9, +Sliding_Window_Multiple <- function(chr,start_loc,end_loc,sliding_window_length=2000,genofile,obj_nullmodel,rare_maf_cutoff=0.01,rv_num_cutoff=2, + rv_num_cutoff_max=1e9,rv_num_cutoff_max_prefilter=1e9, QC_label="annotation/filter",variant_type=c("SNV","Indel","variant"),geno_missing_imputation=c("mean","minor"), Annotation_dir="annotation/info/FunctionalAnnotation",Annotation_name_catalog, Use_annotation_weights=c(TRUE,FALSE),Annotation_name=NULL, @@ -65,8 +66,12 @@ Sliding_Window_Multiple <- function(chr,start_loc,end_loc,sliding_window_length= if(sum(is.in)>=2) { ## Genotype - Geno <- seqGetData(genofile, "$dosage") - Geno <- Geno[id.genotype.match,,drop=FALSE] + Geno <- NULL + if(length(seqGetData(genofile, "variant.id"))=2) { ## Genotype - Geno <- seqGetData(genofile, "$dosage") - Geno <- Geno[id.genotype.match,,drop=FALSE] + Geno <- NULL + if(length(seqGetData(genofile, "variant.id"))=2) { ## Genotype - Geno <- seqGetData(genofile, "$dosage") - Geno <- Geno[id.genotype.match,,drop=FALSE] + Geno <- NULL + if(length(seqGetData(genofile, "variant.id"))=2) { ## Genotype - Geno <- seqGetData(genofile, "$dosage") - Geno <- Geno[id.genotype.match,,drop=FALSE] + Geno <- NULL + if(length(seqGetData(genofile, "variant.id"))**STAARpipeline** user ma ## Data Availability The whole-genome functional annotation data assembled from a variety of sources and the precomputed annotation principal components are available at the [Functional Annotation of Variant - Online Resource (FAVOR)](https://favor.genohub.org) site and [FAVOR Essential Database](https://doi.org/10.7910/DVN/1VGTJI). ## Version -The current version is 0.9.7.2 (November 14, 2024). +The current version is 0.9.7.2 (November 17, 2024). ## Citation If you use **STAARpipeline** and **STAARpipelineSummary** for your work, please cite: diff --git a/docs/STAARpipeline_manual.pdf b/docs/STAARpipeline_manual.pdf index 21a4a27..c534606 100644 Binary files a/docs/STAARpipeline_manual.pdf and b/docs/STAARpipeline_manual.pdf differ diff --git a/man/Gene_Centric_Coding.Rd b/man/Gene_Centric_Coding.Rd index a39a754..d91582d 100644 --- a/man/Gene_Centric_Coding.Rd +++ b/man/Gene_Centric_Coding.Rd @@ -14,6 +14,7 @@ Gene_Centric_Coding( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -49,6 +50,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").} diff --git a/man/Gene_Centric_Coding_cond.Rd b/man/Gene_Centric_Coding_cond.Rd index d308b09..65a76c6 100644 --- a/man/Gene_Centric_Coding_cond.Rd +++ b/man/Gene_Centric_Coding_cond.Rd @@ -15,6 +15,7 @@ Gene_Centric_Coding_cond( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, method_cond = c("optimal", "naive"), QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), @@ -51,6 +52,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{method_cond}{a character value indicating the method for conditional analysis. \code{optimal} refers to regressing residuals from the null model on \code{known_loci} as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; diff --git a/man/Gene_Centric_Coding_cond_spa.Rd b/man/Gene_Centric_Coding_cond_spa.Rd index 810786d..d3570c4 100644 --- a/man/Gene_Centric_Coding_cond_spa.Rd +++ b/man/Gene_Centric_Coding_cond_spa.Rd @@ -15,7 +15,7 @@ Gene_Centric_Coding_cond_spa( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, - method_cond = c("optimal", "naive"), + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -53,11 +53,8 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} -\item{method_cond}{a character value indicating the method for conditional analysis. -\code{optimal} refers to regressing residuals from the null model on \code{known_loci} -as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; -\code{naive} refers to regressing residuals from the null model on \code{known_loci} -and taking the residuals (default = \code{optimal}).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} diff --git a/man/Gene_Centric_Noncoding.Rd b/man/Gene_Centric_Noncoding.Rd index 463defe..db6d009 100644 --- a/man/Gene_Centric_Noncoding.Rd +++ b/man/Gene_Centric_Noncoding.Rd @@ -14,6 +14,7 @@ Gene_Centric_Noncoding( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -48,6 +49,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").} diff --git a/man/Gene_Centric_Noncoding_cond.Rd b/man/Gene_Centric_Noncoding_cond.Rd index 92a31f7..3c4c296 100644 --- a/man/Gene_Centric_Noncoding_cond.Rd +++ b/man/Gene_Centric_Noncoding_cond.Rd @@ -15,6 +15,7 @@ Gene_Centric_Noncoding_cond( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, method_cond = c("optimal", "naive"), QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), @@ -51,6 +52,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{method_cond}{a character value indicating the method for conditional analysis. \code{optimal} refers to regressing residuals from the null model on \code{known_loci} as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; diff --git a/man/Gene_Centric_Noncoding_cond_spa.Rd b/man/Gene_Centric_Noncoding_cond_spa.Rd index b2a0fa3..8754a99 100644 --- a/man/Gene_Centric_Noncoding_cond_spa.Rd +++ b/man/Gene_Centric_Noncoding_cond_spa.Rd @@ -15,7 +15,7 @@ Gene_Centric_Noncoding_cond_spa( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, - method_cond = c("optimal", "naive"), + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -53,11 +53,8 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} -\item{method_cond}{a character value indicating the method for conditional analysis. -\code{optimal} refers to regressing residuals from the null model on \code{known_loci} -as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; -\code{naive} refers to regressing residuals from the null model on \code{known_loci} -and taking the residuals (default = \code{optimal}).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} diff --git a/man/Sliding_Window.Rd b/man/Sliding_Window.Rd index 493877c..ac7f87c 100644 --- a/man/Sliding_Window.Rd +++ b/man/Sliding_Window.Rd @@ -15,6 +15,7 @@ Sliding_Window( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -53,6 +54,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").} diff --git a/man/Sliding_Window_cond.Rd b/man/Sliding_Window_cond.Rd index 5ff8cc2..2b7d723 100644 --- a/man/Sliding_Window_cond.Rd +++ b/man/Sliding_Window_cond.Rd @@ -14,6 +14,7 @@ Sliding_Window_cond( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, method_cond = c("optimal", "naive"), QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), @@ -49,6 +50,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{method_cond}{a character value indicating the method for conditional analysis. \code{optimal} refers to regressing residuals from the null model on \code{known_loci} as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; diff --git a/man/Sliding_Window_cond_spa.Rd b/man/Sliding_Window_cond_spa.Rd index 3161ffc..25234df 100644 --- a/man/Sliding_Window_cond_spa.Rd +++ b/man/Sliding_Window_cond_spa.Rd @@ -14,6 +14,7 @@ Sliding_Window_cond_spa( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -50,6 +51,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").} diff --git a/man/ncRNA.Rd b/man/ncRNA.Rd index 77d7f6e..81b1010 100644 --- a/man/ncRNA.Rd +++ b/man/ncRNA.Rd @@ -12,6 +12,7 @@ ncRNA( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -43,6 +44,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").} diff --git a/man/ncRNA_cond.Rd b/man/ncRNA_cond.Rd index 27e2ba9..8179a61 100644 --- a/man/ncRNA_cond.Rd +++ b/man/ncRNA_cond.Rd @@ -13,6 +13,7 @@ ncRNA_cond( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, method_cond = c("optimal", "naive"), QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), @@ -46,6 +47,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{method_cond}{a character value indicating the method for conditional analysis. \code{optimal} refers to regressing residuals from the null model on \code{known_loci} as well as all covariates used in fitting the null model (fully adjusted) and taking the residuals; diff --git a/man/ncRNA_cond_spa.Rd b/man/ncRNA_cond_spa.Rd index 5d96537..97c2cb0 100644 --- a/man/ncRNA_cond_spa.Rd +++ b/man/ncRNA_cond_spa.Rd @@ -13,6 +13,7 @@ ncRNA_cond_spa( rare_maf_cutoff = 0.01, rv_num_cutoff = 2, rv_num_cutoff_max = 1e+09, + rv_num_cutoff_max_prefilter = 1e+09, QC_label = "annotation/filter", variant_type = c("SNV", "Indel", "variant"), geno_missing_imputation = c("mean", "minor"), @@ -48,6 +49,9 @@ a given variant-set (default = 2).} \item{rv_num_cutoff_max}{the cutoff of maximum number of variants of analyzing a given variant-set (default = 1e+09).} +\item{rv_num_cutoff_max_prefilter}{the cutoff of maximum number of variants +before extracting the genotype matrix (default = 1e+09).} + \item{QC_label}{channel name of the QC label in the GDS/aGDS file (default = "annotation/filter").} \item{variant_type}{type of variant included in the analysis. Choices include "SNV", "Indel", or "variant" (default = "SNV").}