Skip to content

Commit

Permalink
neat log and cli
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Oct 3, 2024
1 parent 1679cca commit 2d5584c
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 30 deletions.
32 changes: 16 additions & 16 deletions src/Cmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ Param::Param(int argc, char **argv) {
"\033[0m\n"};
OptionParser opts(copyr + "Main options");
auto help_opt = opts.add<Switch>("h", "help", "print all options including hidden advanced options");
auto svd_opt = opts.add<Value<uint>>("d", "svd", "svd method to be applied. default 2 is recommended for big data.\n"
auto svd_opt = opts.add<Value<uint>>("d", "svd", "SVD method to be applied. default 2 is recommended for big data.\n"
"0: the Implicitly Restarted Arnoldi Method (IRAM)\n"
"1: the Yu's single-pass Randomized SVD with power iterations\n"
"2: the proposed window-based Randomized SVD method\n"
"2: the accurate window-based Randomized SVD method (PCAone)\n"
"3: the full Singular Value Decomposition.", 2);
auto plinkfile = opts.add<Value<std::string>>("b", "bfile", "prefix to PLINK .bed/.bim/.fam files", "", &filein);
auto binfile = opts.add<Value<std::string>>("B", "binary", "path of binary file", "", &filein);
Expand All @@ -37,25 +37,25 @@ Param::Param(int argc, char **argv) {
auto beaglefile = opts.add<Value<std::string>>("G", "beagle", "path of BEAGLE file compressed by gzip", "", &filein);
opts.add<Value<uint>>("k", "pc", "top k principal components (PCs) to be calculated", k, &k);
opts.add<Value<double>>("m", "memory", "desired RAM usage in GB unit for out-of-core mode. default is in-core mode", memory, &memory);
opts.add<Value<uint>>("n", "threads", "number of threads to be used", threads, &threads);
opts.add<Value<uint>>("n", "threads", "the number of threads to be used", threads, &threads);
opts.add<Value<std::string>>("o", "out", "prefix to output files. default [pcaone]", fileout, &fileout);
opts.add<Value<uint>>("p", "maxp", "maximum number of power iterations for RSVD algorithm", maxp, &maxp);
opts.add<Switch>("S", "no-shuffle", "do not shuffle the data for --svd 2 if it is already permuted", &noshuffle);
opts.add<Switch>("S", "no-shuffle", "do not shuffle columns of data for --svd 2 (if not locally correlated)", &noshuffle);
opts.add<Switch>("v", "verbose", "verbose message output", &verbose);
opts.add<Value<uint>, Attribute::advanced>("w", "batches", "number of mini-batches to be used by PCAone --svd 2", bands, &bands);
opts.add<Value<uint>, Attribute::advanced>("w", "batches", "the number of mini-batches used by --svd 2", bands, &bands);
opts.add<Value<uint>>("C", "scale", "do scaling for input file.\n"
"0: do just centering\n"
"1: do log transformation eg. log(x+0.01) for RNA-seq data\n"
"2: do count per median log transformation (CPMED) for scRNAs",
scale, &scale);
opts.add<Switch>("", "emu", "uses EMU algorithm for genotype input with missingness", &emu);
opts.add<Switch>("", "pcangsd", "uses PCAngsd algorithm for genotype likelihood input", &pcangsd);
opts.add<Switch>("", "emu", "use EMU algorithm for genotype input with missingness", &emu);
opts.add<Switch>("", "pcangsd", "use PCAngsd algorithm for genotype likelihood input", &pcangsd);
opts.add<Value<double>>("", "maf", "exclude variants with MAF lower than this value", maf, &maf);
opts.add<Switch>("V", "printv", "output the right eigenvectors with suffix .loadings", &printv);
opts.add<Switch>("", "ld", "output a binary matrix for downstream LD related analysis", &ld);
auto bimfile = opts.add<Value<std::string>>("", "ld-bim", "variants information in plink bim file related to LD matrix", "", &filebim);
opts.add<Value<double>>("", "ld-r2", "r2 cutoff for LD-based pruning.", ld_r2, &ld_r2);
opts.add<Value<uint>>("", "ld-bp", "physical distance threshold in bases for LD (usually. 1000000)", ld_bp, &ld_bp);
opts.add<Value<double>>("", "ld-r2", "r2 cutoff for LD-based pruning. (usually 0.2)", ld_r2, &ld_r2);
opts.add<Value<uint>>("", "ld-bp", "physical distance threshold in bases for LD. (usually 1000000)", ld_bp, &ld_bp);
opts.add<Value<int>>("", "ld-stats", "statistics to calculate LD r2 for pairwise SNPs.\n"
"0: the ancestry adjusted, i.e. correlation between residuals\n"
"1: the standard, i.e. correlation between two alleles\n",
Expand All @@ -67,16 +67,16 @@ Param::Param(int argc, char **argv) {
opts.add<Value<double>>("", "clump-p2", "secondary significance threshold for clumped SNPs", clump_p2, &clump_p2);
opts.add<Value<double>>("", "clump-r2", "r2 cutoff for LD-based clumping", clump_r2, &clump_r2);
opts.add<Value<uint>>("", "clump-bp", "physical distance threshold in bases for clumping", clump_bp, &clump_bp);
opts.add<Switch, Attribute::advanced>("U", "printu", "output eigen vector of each epoch (for tests)", &printu);
opts.add<Value<uint>, Attribute::advanced>("", "M", "number of features (eg. SNPs) if already known", 0, &nsnps);
opts.add<Value<uint>, Attribute::advanced>("", "N", "number of samples if already known", 0, &nsamples);
opts.add<Switch, Attribute::advanced>("", "printu", "output eigen vector of each epoch (for tests)", &printu);
opts.add<Value<uint>, Attribute::advanced>("", "M", "the number of features (eg. SNPs) if already known", 0, &nsnps);
opts.add<Value<uint>, Attribute::advanced>("", "N", "the number of samples if already known", 0, &nsamples);
// opts.add<Switch, Attribute::advanced>("", "debug", "turn on debugging mode", &debug);
opts.add<Switch, Attribute::advanced>("", "haploid", "the plink format represents haploid data", &haploid);
opts.add<Value<uint>, Attribute::advanced>("", "buffer", "memory buffer in GB unit for permuting the data", buffer, &buffer);
opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM interations", imaxiter, &imaxiter);
opts.add<Value<double>, Attribute::advanced>("", "itol", "tolerance for IRAM algorithm", itol, &itol);
opts.add<Value<uint>, Attribute::advanced>("", "ncv", "number of Lanzcos basis vectors for IRAM", ncv, &ncv);
opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "number of oversampling columns for RSVD", oversamples, &oversamples);
opts.add<Value<uint>, Attribute::advanced>("", "imaxiter", "maximum number of IRAM iterations", imaxiter, &imaxiter);
opts.add<Value<double>, Attribute::advanced>("", "itol", "stopping tolerance for IRAM algorithm", itol, &itol);
opts.add<Value<uint>, Attribute::advanced>("", "ncv", "the number of Lanzcos basis vectors for IRAM", ncv, &ncv);
opts.add<Value<uint>, Attribute::advanced>("", "oversamples", "the number of oversampling columns for RSVD", oversamples, &oversamples);
opts.add<Value<uint>, Attribute::advanced>("", "rand", "the random matrix type. 0: uniform, 1: guassian", rand, &rand);
opts.add<Value<double>, Attribute::advanced>("", "tol-rsvd", "tolerance for RSVD algorithm", tol, &tol);
opts.add<Value<double>, Attribute::advanced>("", "tol-em", "tolerance for EMU/PCAngsd algorithm", tolem, &tolem);
Expand Down
2 changes: 1 addition & 1 deletion src/Cmd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Param {
uint nsnps = 0;
uint k = 10;
uint maxp = 40; // maximum number of power iterations
uint threads = 10;
uint threads = 8;
uint blocksize = 0;
uint bands = 64;
bool pca = true;
Expand Down
6 changes: 3 additions & 3 deletions src/Data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ void Data::prepare() {
if (params.memory > 1.1 * m)
m = 0;
else
cao.warning("minimum RAM required is " + to_string(m) +
" GB. trying to allocate more RAM.");
cao.warn("minimum RAM required is " + to_string(m) +
" GB. trying to allocate more RAM.");
params.blocksize = (unsigned int)ceil(
(double)((m + params.memory) * 134217728 - 3 * nsamples * l -
2 * nsnps * l - 5 * nsnps) /
Expand All @@ -47,7 +47,7 @@ void Data::prepare() {
if (nblocks == 1) {
params.out_of_core = false;
read_all();
cao.warning("only one block exists. will run with in-core mode");
cao.warn("only one block exists. will run with in-core mode");
} else {
if (params.svd_t == SvdType::PCAoneAlg2 && params.pca) {
// decrease blocksize to fit the fancy halko
Expand Down
2 changes: 1 addition & 1 deletion src/FileBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void FileBin::check_file_offset_first_var() {
} else {
ifs_bin.seekg(magic, std::ios_base::beg);
if (params.verbose)
cao.warning("make sure you are runing PCAone algorithm2");
cao.warn("confirm you are running the window-based RSVD (algorithm2)");
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/FileCsv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void FileCsv::check_file_offset_first_var() {
} else {
rewind(zbuf.fin);
if (params.verbose)
cao.warning("make sure you are runing PCAone algorithm2");
cao.warn("confirm you are running the window-based RSVD (algorithm2)");
}
zbuf.lastRet = 1;
zbuf.buffCur = "";
Expand Down
6 changes: 3 additions & 3 deletions src/FilePlink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void FileBed::check_file_offset_first_var() {
} else {
bed_ifstream.seekg(3, std::ios_base::beg);
if (params.verbose)
cao.warning("make sure you are running PCAone (algorithm2)");
cao.warn("confirm you are running the window-based RSVD (algorithm2)");
}
}

Expand Down Expand Up @@ -55,7 +55,7 @@ void FileBed::read_all() {
// should remove sites with F=0, 0.5, 1.0
// especially,F=0.5 means sample standard deviation is 0
if (F(i) == 0.0 || F(i) == 0.5 || F(i) == 1.0)
cao.warning("please do remove SNPs with AF=0, 0.5 and 1.0");
cao.warn("recommend to remove SNPs with AF=0, 0.5 and 1.0 first");
}

filter_snps_resize_F(); // filter and resize nsnps
Expand Down Expand Up @@ -156,7 +156,7 @@ void FileBed::read_block_initial(uint64 start_idx, uint64 stop_idx,
F(snp_idx) /= c;
}
if (F(snp_idx) == 0.0 || F(snp_idx) == 0.5 || F(snp_idx) == 1.0)
cao.warning("please do remove SNPs with AF=0, 0.5 and 1.0");
cao.warn("recommend to remove SNPs with AF=0, 0.5 and 1.0 first");
// do centering and initialing
centered_geno_lookup(1, snp_idx) = 0.0; // missing
centered_geno_lookup(0, snp_idx) = BED2GENO[0] - F(snp_idx); // minor hom
Expand Down
6 changes: 2 additions & 4 deletions src/Halko.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,7 @@ void FancyRsvdOpData::computeGandH(MyMatrix& G, MyMatrix& H, int pi) {
band = 1;
blocksize = (unsigned int)ceil((double)data->nsnps / data->params.bands);
if (blocksize < data->params.bands)
cao.warning(
"blocksize is smaller than window size. please consider IRAM "
"method.");
cao.warn("block size < window size. please consider the IRAM method");
// permute snps of G, see
// https://stackoverflow.com/questions/15858569/randomly-permute-rows-columns-of-a-matrix-with-eigen
if (data->params.perm) PCAone::permute_matrix(data->G, data->perm);
Expand Down Expand Up @@ -228,7 +226,7 @@ void FancyRsvdOpData::computeGandH(MyMatrix& G, MyMatrix& H, int pi) {
flip_Omg(Omg2, Omg);
H2.setZero();
} else if ((b + 1) == data->nblocks) {
cao.warning("shouldn't go here if the bands is proper, ie. 2^x");
cao.warn("shouldn't see this if mini-batches is 2^x");
H = H1 + H2;
Eigen::HouseholderQR<MyMatrix> qr(H);
Omg.noalias() =
Expand Down
2 changes: 1 addition & 1 deletion src/Logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Logger {
}

template <class T>
void warning(const T &s) {
void warn(const T &s) {
if (is_screen)
std::cout << std::endl
<< "\x1B[33m"
Expand Down

0 comments on commit 2d5584c

Please sign in to comment.