Skip to content

Commit

Permalink
Merge pull request #4 from refresh-bio/splash2.1.4
Browse files Browse the repository at this point in the history
Splash2.1.4
  • Loading branch information
marekkokot authored Jun 16, 2023
2 parents 15719c9 + c353f3a commit a00034d
Show file tree
Hide file tree
Showing 33 changed files with 1,940 additions and 373 deletions.
48 changes: 28 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
all: satc satc_dump satc_merge sig_anch download_kmc nomad
all: satc satc_dump satc_merge sig_anch download_kmc splash supervised_test

NOMAD_LIBS_DIR = libs
SPLASH_LIBS_DIR = libs
LIBS_DIR = . #/usr/local/lib
INCLUDE_DIR= libs
MIMALLOC_INLUCDE_DIR = libs/mimalloc/include
Expand All @@ -14,14 +14,16 @@ COMMON_DIR=src/common
OUT_BIN_DIR=bin

CC = g++
CFLAGS = -fPIC -Wall -O3 -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
CLINK = -lm -std=c++17 -lpthread -static-libstdc++ -lgfortran
CFLAGS = -fPIC -Wall -O3 -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
CLINK = -lm -std=c++17 -lpthread -static-libstdc++

release: CLINK = -lm -std=c++17 -static -lgfortran -lquadmath -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
release: CFLAGS = -fPIC -Wall -O3 -DNDEBUG -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
MIMALLOC_OBJ=libs/mimalloc/mimalloc.o

release: CLINK = -lm -std=c++17 -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
release: CFLAGS = -fPIC -Wall -O3 -DNDEBUG -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
release: all

debug: CFLAGS = -fPIC -Wall -O0 -g -m64 -std=c++17 -mavx -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
debug: CFLAGS = -fPIC -Wall -O0 -g -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(MIMALLOC_INLUCDE_DIR) -fpermissive
debug: all

ifdef MSVC # Avoid the MingW/Cygwin sections
Expand All @@ -47,18 +49,22 @@ prefix = /usr/local
# optional install location
exec_prefix = $(prefix)

$(MIMALLOC_OBJ):
$(CC) -DMI_MALLOC_OVERRIDE -O3 -DNDEBUG -fPIC -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden -ftls-model=initial-exec -fno-builtin-malloc -c -I libs/mimalloc/include libs/mimalloc/src/static.c -o $(MIMALLOC_OBJ)

%.o: %.cpp
$(CC) $(CFLAGS) -c $< -o $@

satc: $(OUT_BIN_DIR)/satc

$(OUT_BIN_DIR)/satc: $(SATC_MAIN_DIR)/satc.o \
$(SATC_MAIN_DIR)/kmc_api/kmc_file.o \
$(SATC_MAIN_DIR)/kmc_api/mmer.o \
$(SATC_MAIN_DIR)/kmc_api/kmer_api.o
$(COMMON_DIR)/kmc_api/kmc_file.o \
$(COMMON_DIR)/kmc_api/mmer.o \
$(COMMON_DIR)/kmc_api/kmer_api.o \
$(COMMON_DIR)/illumina_adapters_static.o
-mkdir -p $(OUT_BIN_DIR)
$(CC) -o $@ $^ \
$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
$(CLINK)

satc_merge: $(OUT_BIN_DIR)/satc_merge
Expand All @@ -69,15 +75,15 @@ $(OUT_BIN_DIR)/satc_merge: $(SATC_MERGE_MAIN_DIR)/satc_merge.o \
$(SATC_MERGE_MAIN_DIR)/extra_stats.o
-mkdir -p $(OUT_BIN_DIR)
$(CC) -o $@ $^ \
$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
$(CLINK)

satc_dump: $(OUT_BIN_DIR)/satc_dump

$(OUT_BIN_DIR)/satc_dump: $(SATC_DUMP_MAIN_DIR)/satc_dump.o
-mkdir -p $(OUT_BIN_DIR)
$(CC) -o $@ $^ \
$(NOMAD_LIBS_DIR)/$(LIB_ZSTD) \
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
$(CLINK)

sig_anch: $(OUT_BIN_DIR)/sig_anch
Expand All @@ -92,24 +98,26 @@ download_kmc:
-mkdir -p $(OUT_BIN_DIR)
./download_kmc.sh $(OUT_BIN_DIR)

nomad:
cp src/nomad.py bin/nomad
splash:
cp src/splash.py bin/splash
supervised_test:
cp src/supervised_test/supervised_test.R bin

install: all
install bin/* /usr/local/bin

uninstall:
-rm /usr/local/bin/satc
-rm /usr/local/bin/satc
-rm /usr/local/bin/satc_dump
-rm /usr/local/bin/satc_merge
-rm /usr/local/bin/sig_anch
-rm /usr/local/bin/nomad
-rm /usr/local/bin/sig_anch
-rm /usr/local/bin/splash
-rm /usr/local/bin/kmc
-rm /usr/local/bin/kmc_tools

clean:
clean:
-rm $(SATC_MAIN_DIR)/*.o
-rm $(SATC_MAIN_DIR)/kmc_api/*.o
-rm $(COMMON_DIR)/kmc_api/*.o
-rm $(SATC_MERGE_MAIN_DIR)/*.o
-rm $(SATC_DUMP_MAIN_DIR)/*.o
-rm $(COMMON_DIR)/*.o
Expand Down
2 changes: 1 addition & 1 deletion NOMAD_extendor_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ riffle <- function(a, b) { # this function interleaves the elements of two vec
##################################################
############ input arguments #####################
args <- commandArgs(trailingOnly = TRUE)
directory = args[1] # the output directory used for the NOMAD run
directory = args[1] # the output directory used for the SPLASH run
which_anchors_file = args[2] # flag to decide which anchor file (after correction or all anchors) to use, could be "after_correction" or "all"
effect_size_cutoff = args[3] # the effect size cutoff for significant anchors (default 0.2)
num_samples_cutoff = args[4] # the minimum number of sampels for an anchor to be called (default 20)
Expand Down
80 changes: 45 additions & 35 deletions README.md

Large diffs are not rendered by default.

16 changes: 9 additions & 7 deletions build_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@ def replace_in_file(file_path, search_text, new_text):
new_line = line.replace(search_text, new_text)
print(new_line, end='')

def get_ver(nomad_path):
with open(nomad_path) as f:
def get_ver(splash_path):
with open(splash_path) as f:
for line in f.readlines():
line = line.strip()
if "NOMAD_VERSION" in line:
if "SPLASH_VERSION" in line:
return line.split("=")[-1].strip().split("\"")[1]
print("Error: cannot read NOMAD_VERSION")
print("Error: cannot read SPLASH_VERSION")
sys.exit(1)

def run_cmd(cmd):
p = subprocess.Popen(cmd, shell=True)
p.communicate()

run_cmd("git submodule init")
run_cmd("git submodule update")
run_cmd("make clean")
run_cmd("make -j32 release")

Expand All @@ -37,12 +39,12 @@ def run_cmd(cmd):
with open("bin/example/run-example.sh", "w") as f:
f.write("#!/bin/bash\n")
f.write("./download.py\n")
f.write("../nomad --bin_path .. input.txt\n")
f.write("../splash --bin_path .. input.txt\n")

os.chmod("bin/example/run-example.sh", os.stat("bin/example/run-example.sh").st_mode | stat.S_IEXEC)


ver = get_ver("bin/nomad")
ver = get_ver("bin/splash")

run_cmd(f"cd bin; tar -c * | pigz > ../nomad-{ver}.linux.x64.tar.gz; cd ..;")
run_cmd(f"cd bin; tar -c * | pigz > ../splash-{ver}.linux.x64.tar.gz; cd ..;")
run_cmd("rm -rf bin")
6 changes: 3 additions & 3 deletions example/analysis_scripts/c_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"metadata": {},
"outputs": [],
"source": [
"NOMAD2_result_folder=''"
"SPLASH2_result_folder=''"
]
},
{
Expand All @@ -41,7 +41,7 @@
"outputs": [],
"source": [
"### read in p-values, afetr_correction.scores.csv file\n",
"fname=NOMAD2_result_folder+'/result.after_correction.scores.tsv'\n",
"fname=SPLASH2_result_folder+'/result.after_correction.scores.tsv'\n",
"df = pd.read_csv(fname,sep='\\t')"
]
},
Expand Down Expand Up @@ -95,7 +95,7 @@
"dfArr = []\n",
"\n",
"##### path to intermediary_files output folder\n",
"fldrName= NOMAD2_result_folder+'/result_Cjs/'\n",
"fldrName= SPLASH2_result_folder+'/result_Cjs/'\n",
"\n",
"for fname in tqdm(glob.glob(fldrName+'bin*.cjs')):\n",
" dfArr.append(\n",
Expand Down
2 changes: 1 addition & 1 deletion example/analysis_scripts/plotGeneration.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_args():
parser.add_argument(
"--satc_dump_file",
type=str,
help='Path to satc_dump utility file (within NOMAD2, /bin/satc_dump)'
help='Path to satc_dump utility file (within SPLASH2, /bin/satc_dump)'
)

args = parser.parse_args()
Expand Down
129 changes: 129 additions & 0 deletions libs/refresh/deterministic_random.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#pragma once
#include <random>
#include <tuple>
#include <iostream>

#undef min
#undef max

template<class IntType = int>
class det_uniform_int_distribution {
public:
// types
typedef IntType result_type;
typedef std::pair<int, int> param_type;

// constructors and reset functions
explicit det_uniform_int_distribution(IntType a = 0, IntType b = std::numeric_limits<IntType>::max());
explicit det_uniform_int_distribution(const param_type& parm);
void reset();

// generating functions
template<class URNG>
result_type operator()(URNG& g);
template<class URNG>
result_type operator()(URNG& g, const param_type& parm);

// property functions
result_type a() const;
result_type b() const;
param_type param() const;
void param(const param_type& parm);
result_type min() const;
result_type max() const;

private:
typedef typename std::make_unsigned<IntType>::type diff_type;

IntType lower;
IntType upper;
};

template<class IntType>
det_uniform_int_distribution<IntType>::det_uniform_int_distribution(IntType a, IntType b) {
lower = a;
upper = b;
}

template<class IntType>
det_uniform_int_distribution<IntType>::det_uniform_int_distribution(const param_type& parm) {
param(parm);
}

template<class IntType>
void det_uniform_int_distribution<IntType>::reset() {}

template<class IntType>
template<class URNG>
auto det_uniform_int_distribution<IntType>::operator()(URNG& g) -> result_type {
return operator()(g, param());
}

template<class IntType>
template<class URNG>
auto det_uniform_int_distribution<IntType>::operator()(URNG& g, const param_type& parm) -> result_type {
diff_type diff = (diff_type)parm.second - (diff_type)parm.first + 1;
if (diff == 0) // If the +1 overflows we are using the full range, just return g()
return g();

diff_type badDistLimit = std::numeric_limits<diff_type>::max() / diff;
do {
diff_type generatedRand = g();

if (generatedRand / diff < badDistLimit)
return (IntType)((generatedRand % diff) + (diff_type)parm.first);
} while (true);
}

template<class IntType>
auto det_uniform_int_distribution<IntType>::a() const -> result_type {
return lower;
}

template<class IntType>
auto det_uniform_int_distribution<IntType>::b() const -> result_type {
return upper;
}

template<class IntType>
auto det_uniform_int_distribution<IntType>::param() const -> param_type {
return param_type(lower, upper);
}

template<class IntType>
void det_uniform_int_distribution<IntType>::param(const param_type& parm) {
std::tie(lower, upper) = parm;
if (upper < lower)
throw std::exception();
}

template<class IntType>
auto det_uniform_int_distribution<IntType>::min() const -> result_type {
return lower;
}

template<class IntType>
auto det_uniform_int_distribution<IntType>::max() const -> result_type {
return upper;
};




template<class RandomIt, class URBG>
void partial_shuffle(RandomIt first, RandomIt middle, RandomIt last, URBG&& g)
{
typedef typename std::iterator_traits<RandomIt>::difference_type diff_t;
typedef det_uniform_int_distribution<diff_t> distr_t;
typedef typename distr_t::param_type param_t;

distr_t D;
diff_t n = middle - first;
diff_t N = last - first - 1;
for (diff_t i =0; i < n; ++i) {
using std::swap;
swap(first[i], first[D(g, param_t(i, N))]);
}
}


5 changes: 5 additions & 0 deletions libs/refresh/parallel-queues.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ namespace refresh {
}
}

bool check_completed() {
std::lock_guard<std::mutex> lck(mtx);
return is_completed;
}

void cancel()
{
std::lock_guard<std::mutex> lck(mtx);
Expand Down
Loading

0 comments on commit a00034d

Please sign in to comment.